diff --git a/3rdparty/vixl/AUTHORS b/3rdparty/vixl/AUTHORS new file mode 100644 index 0000000000..257ec9d32b --- /dev/null +++ b/3rdparty/vixl/AUTHORS @@ -0,0 +1,8 @@ +# Below is a list of people and organisations that have contributed to the VIXL +# project. Entries should be added to the list as: +# +# Name/Organization + +ARM Ltd. <*@arm.com> +Google Inc. <*@google.com> +Linaro <*@linaro.org> diff --git a/3rdparty/vixl/CMakeLists.txt b/3rdparty/vixl/CMakeLists.txt new file mode 100644 index 0000000000..8b73577b38 --- /dev/null +++ b/3rdparty/vixl/CMakeLists.txt @@ -0,0 +1,63 @@ +add_library(vixl + include/vixl/aarch64/abi-aarch64.h + include/vixl/aarch64/assembler-aarch64.h + include/vixl/aarch64/constants-aarch64.h + include/vixl/aarch64/cpu-aarch64.h + include/vixl/aarch64/cpu-features-auditor-aarch64.h + include/vixl/aarch64/decoder-aarch64.h + include/vixl/aarch64/decoder-constants-aarch64.h + include/vixl/aarch64/decoder-visitor-map-aarch64.h + include/vixl/aarch64/disasm-aarch64.h + include/vixl/aarch64/instructions-aarch64.h + include/vixl/aarch64/macro-assembler-aarch64.h + include/vixl/aarch64/operands-aarch64.h + include/vixl/aarch64/registers-aarch64.h + include/vixl/aarch64/simulator-aarch64.h + include/vixl/aarch64/simulator-constants-aarch64.h + include/vixl/assembler-base-vixl.h + include/vixl/code-buffer-vixl.h + include/vixl/code-generation-scopes-vixl.h + include/vixl/compiler-intrinsics-vixl.h + include/vixl/cpu-features.h + include/vixl/globals-vixl.h + include/vixl/invalset-vixl.h + include/vixl/macro-assembler-interface.h + include/vixl/platform-vixl.h + include/vixl/pool-manager-impl.h + include/vixl/pool-manager.h + include/vixl/utils-vixl.h + src/aarch64/assembler-aarch64.cc + src/aarch64/assembler-sve-aarch64.cc + src/aarch64/cpu-aarch64.cc + src/aarch64/cpu-features-auditor-aarch64.cc + src/aarch64/decoder-aarch64.cc + src/aarch64/disasm-aarch64.cc + src/aarch64/instructions-aarch64.cc + src/aarch64/logic-aarch64.cc + src/aarch64/macro-assembler-aarch64.cc + src/aarch64/macro-assembler-sve-aarch64.cc + src/aarch64/operands-aarch64.cc + src/aarch64/pointer-auth-aarch64.cc + src/aarch64/registers-aarch64.cc + src/code-buffer-vixl.cc + src/compiler-intrinsics-vixl.cc + src/cpu-features.cc + src/utils-vixl.cc +) + +target_include_directories(vixl PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/include +) +target_include_directories(vixl PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include/vixl + ${CMAKE_CURRENT_SOURCE_DIR}/include/vixl/aarch64 +) +target_compile_definitions(vixl PUBLIC + VIXL_INCLUDE_TARGET_A64 +) + +if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + message("Enabling vixl debug assertions") + target_compile_definitions(vixl PUBLIC VIXL_DEBUG) +endif() + diff --git a/3rdparty/vixl/LICENCE b/3rdparty/vixl/LICENCE new file mode 100644 index 0000000000..0acd8ebd63 --- /dev/null +++ b/3rdparty/vixl/LICENCE @@ -0,0 +1,30 @@ +LICENCE +======= + +The software in this repository is covered by the following licence. + +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/3rdparty/vixl/README.md b/3rdparty/vixl/README.md new file mode 100644 index 0000000000..acf226051b --- /dev/null +++ b/3rdparty/vixl/README.md @@ -0,0 +1,186 @@ +VIXL: Armv8 Runtime Code Generation Library, 3.0.0 +================================================== + +Contents: + + * Overview + * Licence + * Requirements + * Known limitations + * Usage + + +Overview +======== + +VIXL contains three components. + + 1. Programmatic **assemblers** to generate A64, A32 or T32 code at runtime. The + assemblers abstract some of the constraints of each ISA; for example, most + instructions support any immediate. + 2. **Disassemblers** that can print any instruction emitted by the assemblers. + 3. A **simulator** that can simulate any instruction emitted by the A64 + assembler. The simulator allows generated code to be run on another + architecture without the need for a full ISA model. + +The VIXL git repository can be found [on 'https://git.linaro.org'][vixl]. + +Changes from previous versions of VIXL can be found in the +[Changelog](doc/changelog.md). + + +Licence +======= + +This software is covered by the licence described in the [LICENCE](LICENCE) +file. + + +Requirements +============ + +To build VIXL the following software is required: + + 1. Python 2.7 + 2. SCons 2.0 + 3. GCC 4.8+ or Clang 3.4+ + +A 64-bit host machine is required, implementing an LP64 data model. VIXL has +been tested using GCC on AArch64 Debian, GCC and Clang on amd64 Ubuntu +systems. + +To run the linter and code formatting stages of the tests, the following +software is also required: + + 1. Git + 2. [Google's `cpplint.py`][cpplint] + 3. clang-format-3.8 + +Refer to the 'Usage' section for details. + + +Known Limitations for AArch64 code generation +============================================= + +VIXL was developed for JavaScript engines so a number of features from A64 were +deemed unnecessary: + + * Limited rounding mode support for floating point. + * Limited support for synchronisation instructions. + * Limited support for system instructions. + * A few miscellaneous integer and floating point instructions are missing. + +The VIXL simulator supports only those instructions that the VIXL assembler can +generate. The `doc` directory contains a +[list of supported A64 instructions](doc/aarch64/supported-instructions-aarch64.md). + +The VIXL simulator was developed to run on 64-bit amd64 platforms. Whilst it +builds and mostly works for 32-bit x86 platforms, there are a number of +floating-point operations which do not work correctly, and a number of tests +fail as a result. + +VIXL may not build using Clang 3.7, due to a compiler warning. A workaround is +to disable conversion of warnings to errors, or to delete the offending +`return` statement reported and rebuild. This problem will be fixed in the next +release. + +Debug Builds +------------ + +Your project's build system must define `VIXL_DEBUG` (eg. `-DVIXL_DEBUG`) +when using a VIXL library that has been built with debug enabled. + +Some classes defined in VIXL header files contain fields that are only present +in debug builds, so if `VIXL_DEBUG` is defined when the library is built, but +not defined for the header files included in your project, you will see runtime +failures. + +Exclusive-Access Instructions +----------------------------- + +All exclusive-access instructions are supported, but the simulator cannot +accurately simulate their behaviour as described in the ARMv8 Architecture +Reference Manual. + + * A local monitor is simulated, so simulated exclusive loads and stores execute + as expected in a single-threaded environment. + * The global monitor is simulated by occasionally causing exclusive-access + instructions to fail regardless of the local monitor state. + * Load-acquire, store-release semantics are approximated by issuing a host + memory barrier after loads or before stores. The built-in + `__sync_synchronize()` is used for this purpose. + +The simulator tries to be strict, and implements the following restrictions that +the ARMv8 ARM allows: + + * A pair of load-/store-exclusive instructions will only succeed if they have + the same address and access size. + * Most of the time, cache-maintenance operations or explicit memory accesses + will clear the exclusive monitor. + * To ensure that simulated code does not depend on this behaviour, the + exclusive monitor will sometimes be left intact after these instructions. + +Instructions affected by these limitations: + `stxrb`, `stxrh`, `stxr`, `ldxrb`, `ldxrh`, `ldxr`, `stxp`, `ldxp`, `stlxrb`, + `stlxrh`, `stlxr`, `ldaxrb`, `ldaxrh`, `ldaxr`, `stlxp`, `ldaxp`, `stlrb`, + `stlrh`, `stlr`, `ldarb`, `ldarh`, `ldar`, `clrex`. + + +Usage +===== + +Running all Tests +----------------- + +The helper script `tools/test.py` will build and run every test that is provided +with VIXL, in both release and debug mode. It is a useful script for verifying +that all of VIXL's dependencies are in place and that VIXL is working as it +should. + +By default, the `tools/test.py` script runs a linter to check that the source +code conforms with the code style guide, and to detect several common errors +that the compiler may not warn about. This is most useful for VIXL developers. +The linter has the following dependencies: + + 1. Git must be installed, and the VIXL project must be in a valid Git + repository, such as one produced using `git clone`. + 2. `cpplint.py`, [as provided by Google][cpplint], must be available (and + executable) on the `PATH`. + +It is possible to tell `tools/test.py` to skip the linter stage by passing +`--nolint`. This removes the dependency on `cpplint.py` and Git. The `--nolint` +option is implied if the VIXL project is a snapshot (with no `.git` directory). + +Additionally, `tools/test.py` tests code formatting using `clang-format-3.8`. +If you don't have `clang-format-3.8`, disable the test using the +`--noclang-format` option. + +Also note that the tests for the tracing features depend upon external `diff` +and `sed` tools. If these tools are not available in `PATH`, these tests will +fail. + +Getting Started +--------------- + +We have separate guides for introducing VIXL, depending on what architecture you +are targeting. A guide for working with AArch32 can be found +[here][getting-started-aarch32], while the AArch64 guide is +[here][getting-started-aarch64]. Example source code is provided in the +[examples](examples) directory. You can build examples with either `scons +aarch32_examples` or `scons aarch64_examples` from the root directory, or use +`scons --help` to get a detailed list of available build targets. + + + + +[cpplint]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py + "Google's cpplint.py script." + +[vixl]: https://git.linaro.org/arm/vixl.git + "The VIXL repository at 'https://git.linaro.org'." + +[getting-started-aarch32]: doc/aarch32/getting-started-aarch32.md + "Introduction to VIXL for AArch32." + +[getting-started-aarch64]: doc/aarch64/getting-started-aarch64.md + "Introduction to VIXL for AArch64." diff --git a/3rdparty/vixl/VERSIONS.md b/3rdparty/vixl/VERSIONS.md new file mode 100644 index 0000000000..87bc0dbae3 --- /dev/null +++ b/3rdparty/vixl/VERSIONS.md @@ -0,0 +1,30 @@ +Versioning +========== + +Since version 3.0.0, VIXL uses [Semantic Versioning 2.0.0][semver]. + +Briefly: + +- Backwards-incompatible changes update the _major_ version. +- New features update the _minor_ version. +- Bug fixes update the _patch_ version. + +Why 3.0.0? +---------- + +VIXL was originally released as 1.x using snapshot releases. When we moved VIXL +into Linaro, we started working directly on `master` and stopped tagging +named releases. However, we informally called this "VIXL 2", so we are skipping +2.0.0 to avoid potential confusion. + +Using `master` +-------------- + +Users who want to take the latest development version of VIXL can still take +commits from `master`. Our day-to-day development process hasn't changed and +these commits should still pass their own tests. However, note that commits not +explicitly tagged with a given version should be considered to be unversioned, +with no backwards-compatibility guarantees. + +[semver]: https://semver.org/spec/v2.0.0.html + "Semantic Versioning 2.0.0 Specification" diff --git a/3rdparty/vixl/include/vixl/aarch64/abi-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/abi-aarch64.h new file mode 100644 index 0000000000..7e6cd9a41f --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/abi-aarch64.h @@ -0,0 +1,167 @@ +// Copyright 2016, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The ABI features are only supported with C++11 or later. +#if __cplusplus >= 201103L +// This should not be defined manually. +#define VIXL_HAS_ABI_SUPPORT +#elif defined(VIXL_HAS_ABI_SUPPORT) +#error "The ABI support requires C++11 or later." +#endif + +#ifdef VIXL_HAS_ABI_SUPPORT + +#ifndef VIXL_AARCH64_ABI_AARCH64_H_ +#define VIXL_AARCH64_ABI_AARCH64_H_ + +#include +#include + +#include "../globals-vixl.h" + +#include "instructions-aarch64.h" +#include "operands-aarch64.h" + +namespace vixl { +namespace aarch64 { + +// Class describing the AArch64 procedure call standard, as defined in "ARM +// Procedure Call Standard for the ARM 64-bit Architecture (AArch64)", +// release 1.0 (AAPCS below). +// +// The stages in the comments match the description in that document. +// +// Stage B does not apply to arguments handled by this class. +class ABI { + public: + explicit ABI(Register stack_pointer = sp) : stack_pointer_(stack_pointer) { + // Stage A - Initialization + Reset(); + } + + void Reset() { + NGRN_ = 0; + NSRN_ = 0; + stack_offset_ = 0; + } + + int GetStackSpaceRequired() { return stack_offset_; } + + // The logic is described in section 5.5 of the AAPCS. + template + GenericOperand GetReturnGenericOperand() const { + ABI abi(stack_pointer_); + GenericOperand result = abi.GetNextParameterGenericOperand(); + VIXL_ASSERT(result.IsCPURegister()); + return result; + } + + // The logic is described in section 5.4.2 of the AAPCS. + // The `GenericOperand` returned describes the location reserved for the + // argument from the point of view of the callee. + template + GenericOperand GetNextParameterGenericOperand() { + const bool is_floating_point_type = std::is_floating_point::value; + const bool is_integral_type = + std::is_integral::value || std::is_enum::value; + const bool is_pointer_type = std::is_pointer::value; + int type_alignment = std::alignment_of::value; + + // We only support basic types. + VIXL_ASSERT(is_floating_point_type || is_integral_type || is_pointer_type); + + // To ensure we get the correct type of operand when simulating on a 32-bit + // host, force the size of pointer types to the native AArch64 pointer size. + unsigned size = is_pointer_type ? 8 : sizeof(T); + // The size of the 'operand' reserved for the argument. + unsigned operand_size = AlignUp(size, kWRegSizeInBytes); + if (size > 8) { + VIXL_UNIMPLEMENTED(); + return GenericOperand(); + } + + // Stage C.1 + if (is_floating_point_type && (NSRN_ < 8)) { + return GenericOperand(VRegister(NSRN_++, size * kBitsPerByte)); + } + // Stages C.2, C.3, and C.4: Unsupported. Caught by the assertions above. + // Stages C.5 and C.6 + if (is_floating_point_type) { + VIXL_STATIC_ASSERT( + !is_floating_point_type || + (std::is_same::value || std::is_same::value)); + int offset = stack_offset_; + stack_offset_ += 8; + return GenericOperand(MemOperand(stack_pointer_, offset), operand_size); + } + // Stage C.7 + if ((is_integral_type || is_pointer_type) && (size <= 8) && (NGRN_ < 8)) { + return GenericOperand(Register(NGRN_++, operand_size * kBitsPerByte)); + } + // Stage C.8 + if (type_alignment == 16) { + NGRN_ = AlignUp(NGRN_, 2); + } + // Stage C.9 + if (is_integral_type && (size == 16) && (NGRN_ < 7)) { + VIXL_UNIMPLEMENTED(); + return GenericOperand(); + } + // Stage C.10: Unsupported. Caught by the assertions above. + // Stage C.11 + NGRN_ = 8; + // Stage C.12 + stack_offset_ = AlignUp(stack_offset_, std::max(type_alignment, 8)); + // Stage C.13: Unsupported. Caught by the assertions above. + // Stage C.14 + VIXL_ASSERT(size <= 8u); + size = std::max(size, 8u); + int offset = stack_offset_; + stack_offset_ += size; + return GenericOperand(MemOperand(stack_pointer_, offset), operand_size); + } + + private: + Register stack_pointer_; + // Next General-purpose Register Number. + int NGRN_; + // Next SIMD and Floating-point Register Number. + int NSRN_; + // The acronym "NSAA" used in the standard refers to the "Next Stacked + // Argument Address". Here we deal with offsets from the stack pointer. + int stack_offset_; +}; + +template <> +inline GenericOperand ABI::GetReturnGenericOperand() const { + return GenericOperand(); +} +} +} // namespace vixl::aarch64 + +#endif // VIXL_AARCH64_ABI_AARCH64_H_ + +#endif // VIXL_HAS_ABI_SUPPORT diff --git a/3rdparty/vixl/include/vixl/aarch64/assembler-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/assembler-aarch64.h new file mode 100644 index 0000000000..636fbdb913 --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/assembler-aarch64.h @@ -0,0 +1,8328 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_ASSEMBLER_AARCH64_H_ +#define VIXL_AARCH64_ASSEMBLER_AARCH64_H_ + +#include "../assembler-base-vixl.h" +#include "../code-generation-scopes-vixl.h" +#include "../cpu-features.h" +#include "../globals-vixl.h" +#include "../invalset-vixl.h" +#include "../utils-vixl.h" +#include "operands-aarch64.h" + +namespace vixl { +namespace aarch64 { + +class LabelTestHelper; // Forward declaration. + + +class Label { + public: + Label() : location_(kLocationUnbound) {} + ~Label() { + // All links to a label must have been resolved before it is destructed. + VIXL_ASSERT(!IsLinked()); + } + + bool IsBound() const { return location_ >= 0; } + bool IsLinked() const { return !links_.empty(); } + + ptrdiff_t GetLocation() const { return location_; } + VIXL_DEPRECATED("GetLocation", ptrdiff_t location() const) { + return GetLocation(); + } + + static const int kNPreallocatedLinks = 4; + static const ptrdiff_t kInvalidLinkKey = PTRDIFF_MAX; + static const size_t kReclaimFrom = 512; + static const size_t kReclaimFactor = 2; + + typedef InvalSet + LinksSetBase; + typedef InvalSetIterator LabelLinksIteratorBase; + + private: + class LinksSet : public LinksSetBase { + public: + LinksSet() : LinksSetBase() {} + }; + + // Allows iterating over the links of a label. The behaviour is undefined if + // the list of links is modified in any way while iterating. + class LabelLinksIterator : public LabelLinksIteratorBase { + public: + explicit LabelLinksIterator(Label* label) + : LabelLinksIteratorBase(&label->links_) {} + + // TODO: Remove these and use the STL-like interface instead. + using LabelLinksIteratorBase::Advance; + using LabelLinksIteratorBase::Current; + }; + + void Bind(ptrdiff_t location) { + // Labels can only be bound once. + VIXL_ASSERT(!IsBound()); + location_ = location; + } + + void AddLink(ptrdiff_t instruction) { + // If a label is bound, the assembler already has the information it needs + // to write the instruction, so there is no need to add it to links_. + VIXL_ASSERT(!IsBound()); + links_.insert(instruction); + } + + void DeleteLink(ptrdiff_t instruction) { links_.erase(instruction); } + + void ClearAllLinks() { links_.clear(); } + + // TODO: The comment below considers average case complexity for our + // usual use-cases. The elements of interest are: + // - Branches to a label are emitted in order: branch instructions to a label + // are generated at an offset in the code generation buffer greater than any + // other branch to that same label already generated. As an example, this can + // be broken when an instruction is patched to become a branch. Note that the + // code will still work, but the complexity considerations below may locally + // not apply any more. + // - Veneers are generated in order: for multiple branches of the same type + // branching to the same unbound label going out of range, veneers are + // generated in growing order of the branch instruction offset from the start + // of the buffer. + // + // When creating a veneer for a branch going out of range, the link for this + // branch needs to be removed from this `links_`. Since all branches are + // tracked in one underlying InvalSet, the complexity for this deletion is the + // same as for finding the element, ie. O(n), where n is the number of links + // in the set. + // This could be reduced to O(1) by using the same trick as used when tracking + // branch information for veneers: split the container to use one set per type + // of branch. With that setup, when a veneer is created and the link needs to + // be deleted, if the two points above hold, it must be the minimum element of + // the set for its type of branch, and that minimum element will be accessible + // in O(1). + + // The offsets of the instructions that have linked to this label. + LinksSet links_; + // The label location. + ptrdiff_t location_; + + static const ptrdiff_t kLocationUnbound = -1; + +// It is not safe to copy labels, so disable the copy constructor and operator +// by declaring them private (without an implementation). +#if __cplusplus >= 201103L + Label(const Label&) = delete; + void operator=(const Label&) = delete; +#else + Label(const Label&); + void operator=(const Label&); +#endif + + // The Assembler class is responsible for binding and linking labels, since + // the stored offsets need to be consistent with the Assembler's buffer. + friend class Assembler; + // The MacroAssembler and VeneerPool handle resolution of branches to distant + // targets. + friend class MacroAssembler; + friend class VeneerPool; +}; + + +class Assembler; +class LiteralPool; + +// A literal is a 32-bit or 64-bit piece of data stored in the instruction +// stream and loaded through a pc relative load. The same literal can be +// referred to by multiple instructions but a literal can only reside at one +// place in memory. A literal can be used by a load before or after being +// placed in memory. +// +// Internally an offset of 0 is associated with a literal which has been +// neither used nor placed. Then two possibilities arise: +// 1) the label is placed, the offset (stored as offset + 1) is used to +// resolve any subsequent load using the label. +// 2) the label is not placed and offset is the offset of the last load using +// the literal (stored as -offset -1). If multiple loads refer to this +// literal then the last load holds the offset of the preceding load and +// all loads form a chain. Once the offset is placed all the loads in the +// chain are resolved and future loads fall back to possibility 1. +class RawLiteral { + public: + enum DeletionPolicy { + kDeletedOnPlacementByPool, + kDeletedOnPoolDestruction, + kManuallyDeleted + }; + + RawLiteral(size_t size, + LiteralPool* literal_pool, + DeletionPolicy deletion_policy = kManuallyDeleted); + + // The literal pool only sees and deletes `RawLiteral*` pointers, but they are + // actually pointing to `Literal` objects. + virtual ~RawLiteral() {} + + size_t GetSize() const { + VIXL_STATIC_ASSERT(kDRegSizeInBytes == kXRegSizeInBytes); + VIXL_STATIC_ASSERT(kSRegSizeInBytes == kWRegSizeInBytes); + VIXL_ASSERT((size_ == kXRegSizeInBytes) || (size_ == kWRegSizeInBytes) || + (size_ == kQRegSizeInBytes)); + return size_; + } + VIXL_DEPRECATED("GetSize", size_t size()) { return GetSize(); } + + uint64_t GetRawValue128Low64() const { + VIXL_ASSERT(size_ == kQRegSizeInBytes); + return low64_; + } + VIXL_DEPRECATED("GetRawValue128Low64", uint64_t raw_value128_low64()) { + return GetRawValue128Low64(); + } + + uint64_t GetRawValue128High64() const { + VIXL_ASSERT(size_ == kQRegSizeInBytes); + return high64_; + } + VIXL_DEPRECATED("GetRawValue128High64", uint64_t raw_value128_high64()) { + return GetRawValue128High64(); + } + + uint64_t GetRawValue64() const { + VIXL_ASSERT(size_ == kXRegSizeInBytes); + VIXL_ASSERT(high64_ == 0); + return low64_; + } + VIXL_DEPRECATED("GetRawValue64", uint64_t raw_value64()) { + return GetRawValue64(); + } + + uint32_t GetRawValue32() const { + VIXL_ASSERT(size_ == kWRegSizeInBytes); + VIXL_ASSERT(high64_ == 0); + VIXL_ASSERT(IsUint32(low64_) || IsInt32(low64_)); + return static_cast(low64_); + } + VIXL_DEPRECATED("GetRawValue32", uint32_t raw_value32()) { + return GetRawValue32(); + } + + bool IsUsed() const { return offset_ < 0; } + bool IsPlaced() const { return offset_ > 0; } + + LiteralPool* GetLiteralPool() const { return literal_pool_; } + + ptrdiff_t GetOffset() const { + VIXL_ASSERT(IsPlaced()); + return offset_ - 1; + } + VIXL_DEPRECATED("GetOffset", ptrdiff_t offset()) { return GetOffset(); } + + protected: + void SetOffset(ptrdiff_t offset) { + VIXL_ASSERT(offset >= 0); + VIXL_ASSERT(IsWordAligned(offset)); + VIXL_ASSERT(!IsPlaced()); + offset_ = offset + 1; + } + VIXL_DEPRECATED("SetOffset", void set_offset(ptrdiff_t offset)) { + SetOffset(offset); + } + + ptrdiff_t GetLastUse() const { + VIXL_ASSERT(IsUsed()); + return -offset_ - 1; + } + VIXL_DEPRECATED("GetLastUse", ptrdiff_t last_use()) { return GetLastUse(); } + + void SetLastUse(ptrdiff_t offset) { + VIXL_ASSERT(offset >= 0); + VIXL_ASSERT(IsWordAligned(offset)); + VIXL_ASSERT(!IsPlaced()); + offset_ = -offset - 1; + } + VIXL_DEPRECATED("SetLastUse", void set_last_use(ptrdiff_t offset)) { + SetLastUse(offset); + } + + size_t size_; + ptrdiff_t offset_; + uint64_t low64_; + uint64_t high64_; + + private: + LiteralPool* literal_pool_; + DeletionPolicy deletion_policy_; + + friend class Assembler; + friend class LiteralPool; +}; + + +template +class Literal : public RawLiteral { + public: + explicit Literal(T value, + LiteralPool* literal_pool = NULL, + RawLiteral::DeletionPolicy ownership = kManuallyDeleted) + : RawLiteral(sizeof(value), literal_pool, ownership) { + VIXL_STATIC_ASSERT(sizeof(value) <= kXRegSizeInBytes); + UpdateValue(value); + } + + Literal(T high64, + T low64, + LiteralPool* literal_pool = NULL, + RawLiteral::DeletionPolicy ownership = kManuallyDeleted) + : RawLiteral(kQRegSizeInBytes, literal_pool, ownership) { + VIXL_STATIC_ASSERT(sizeof(low64) == (kQRegSizeInBytes / 2)); + UpdateValue(high64, low64); + } + + virtual ~Literal() {} + + // Update the value of this literal, if necessary by rewriting the value in + // the pool. + // If the literal has already been placed in a literal pool, the address of + // the start of the code buffer must be provided, as the literal only knows it + // offset from there. This also allows patching the value after the code has + // been moved in memory. + void UpdateValue(T new_value, uint8_t* code_buffer = NULL) { + VIXL_ASSERT(sizeof(new_value) == size_); + memcpy(&low64_, &new_value, sizeof(new_value)); + if (IsPlaced()) { + VIXL_ASSERT(code_buffer != NULL); + RewriteValueInCode(code_buffer); + } + } + + void UpdateValue(T high64, T low64, uint8_t* code_buffer = NULL) { + VIXL_ASSERT(sizeof(low64) == size_ / 2); + memcpy(&low64_, &low64, sizeof(low64)); + memcpy(&high64_, &high64, sizeof(high64)); + if (IsPlaced()) { + VIXL_ASSERT(code_buffer != NULL); + RewriteValueInCode(code_buffer); + } + } + + void UpdateValue(T new_value, const Assembler* assembler); + void UpdateValue(T high64, T low64, const Assembler* assembler); + + private: + void RewriteValueInCode(uint8_t* code_buffer) { + VIXL_ASSERT(IsPlaced()); + VIXL_STATIC_ASSERT(sizeof(T) <= kXRegSizeInBytes); + switch (GetSize()) { + case kSRegSizeInBytes: + *reinterpret_cast(code_buffer + GetOffset()) = + GetRawValue32(); + break; + case kDRegSizeInBytes: + *reinterpret_cast(code_buffer + GetOffset()) = + GetRawValue64(); + break; + default: + VIXL_ASSERT(GetSize() == kQRegSizeInBytes); + uint64_t* base_address = + reinterpret_cast(code_buffer + GetOffset()); + *base_address = GetRawValue128Low64(); + *(base_address + 1) = GetRawValue128High64(); + } + } +}; + + +// Control whether or not position-independent code should be emitted. +enum PositionIndependentCodeOption { + // All code generated will be position-independent; all branches and + // references to labels generated with the Label class will use PC-relative + // addressing. + PositionIndependentCode, + + // Allow VIXL to generate code that refers to absolute addresses. With this + // option, it will not be possible to copy the code buffer and run it from a + // different address; code must be generated in its final location. + PositionDependentCode, + + // Allow VIXL to assume that the bottom 12 bits of the address will be + // constant, but that the top 48 bits may change. This allows `adrp` to + // function in systems which copy code between pages, but otherwise maintain + // 4KB page alignment. + PageOffsetDependentCode +}; + + +// Control how scaled- and unscaled-offset loads and stores are generated. +enum LoadStoreScalingOption { + // Prefer scaled-immediate-offset instructions, but emit unscaled-offset, + // register-offset, pre-index or post-index instructions if necessary. + PreferScaledOffset, + + // Prefer unscaled-immediate-offset instructions, but emit scaled-offset, + // register-offset, pre-index or post-index instructions if necessary. + PreferUnscaledOffset, + + // Require scaled-immediate-offset instructions. + RequireScaledOffset, + + // Require unscaled-immediate-offset instructions. + RequireUnscaledOffset +}; + + +// Assembler. +class Assembler : public vixl::internal::AssemblerBase { + public: + Assembler(byte* buffer, + size_t capacity, + PositionIndependentCodeOption pic = PositionIndependentCode) + : AssemblerBase(buffer, capacity), + pic_(pic), + cpu_features_(CPUFeatures::AArch64LegacyBaseline()) {} + + // Upon destruction, the code will assert that one of the following is true: + // * The Assembler object has not been used. + // * Nothing has been emitted since the last Reset() call. + // * Nothing has been emitted since the last FinalizeCode() call. + ~Assembler() {} + + // System functions. + + // Start generating code from the beginning of the buffer, discarding any code + // and data that has already been emitted into the buffer. + void Reset(); + + // Bind a label to the current PC. + void bind(Label* label); + + // Bind a label to a specified offset from the start of the buffer. + void BindToOffset(Label* label, ptrdiff_t offset); + + // Place a literal at the current PC. + void place(RawLiteral* literal); + + VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) { + return GetCursorOffset(); + } + + VIXL_DEPRECATED("GetBuffer().GetCapacity()", + ptrdiff_t GetBufferEndOffset() const) { + return static_cast(GetBuffer().GetCapacity()); + } + VIXL_DEPRECATED("GetBuffer().GetCapacity()", + ptrdiff_t BufferEndOffset() const) { + return GetBuffer().GetCapacity(); + } + + // Return the address of a bound label. + template + T GetLabelAddress(const Label* label) const { + VIXL_ASSERT(label->IsBound()); + VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); + return GetBuffer().GetOffsetAddress(label->GetLocation()); + } + + Instruction* GetInstructionAt(ptrdiff_t instruction_offset) { + return GetBuffer()->GetOffsetAddress(instruction_offset); + } + VIXL_DEPRECATED("GetInstructionAt", + Instruction* InstructionAt(ptrdiff_t instruction_offset)) { + return GetInstructionAt(instruction_offset); + } + + ptrdiff_t GetInstructionOffset(Instruction* instruction) { + VIXL_STATIC_ASSERT(sizeof(*instruction) == 1); + ptrdiff_t offset = + instruction - GetBuffer()->GetStartAddress(); + VIXL_ASSERT((0 <= offset) && + (offset < static_cast(GetBuffer()->GetCapacity()))); + return offset; + } + VIXL_DEPRECATED("GetInstructionOffset", + ptrdiff_t InstructionOffset(Instruction* instruction)) { + return GetInstructionOffset(instruction); + } + + // Instruction set functions. + + // Branch / Jump instructions. + + // Branch to register. + void br(const Register& xn); + + // Branch with link to register. + void blr(const Register& xn); + + // Branch to register with return hint. + void ret(const Register& xn = lr); + + // Branch to register, with pointer authentication. Using key A and a modifier + // of zero [Armv8.3]. + void braaz(const Register& xn); + + // Branch to register, with pointer authentication. Using key B and a modifier + // of zero [Armv8.3]. + void brabz(const Register& xn); + + // Branch with link to register, with pointer authentication. Using key A and + // a modifier of zero [Armv8.3]. + void blraaz(const Register& xn); + + // Branch with link to register, with pointer authentication. Using key B and + // a modifier of zero [Armv8.3]. + void blrabz(const Register& xn); + + // Return from subroutine, with pointer authentication. Using key A [Armv8.3]. + void retaa(); + + // Return from subroutine, with pointer authentication. Using key B [Armv8.3]. + void retab(); + + // Branch to register, with pointer authentication. Using key A [Armv8.3]. + void braa(const Register& xn, const Register& xm); + + // Branch to register, with pointer authentication. Using key B [Armv8.3]. + void brab(const Register& xn, const Register& xm); + + // Branch with link to register, with pointer authentication. Using key A + // [Armv8.3]. + void blraa(const Register& xn, const Register& xm); + + // Branch with link to register, with pointer authentication. Using key B + // [Armv8.3]. + void blrab(const Register& xn, const Register& xm); + + // Unconditional branch to label. + void b(Label* label); + + // Conditional branch to label. + void b(Label* label, Condition cond); + + // Unconditional branch to PC offset. + void b(int64_t imm26); + + // Conditional branch to PC offset. + void b(int64_t imm19, Condition cond); + + // Branch with link to label. + void bl(Label* label); + + // Branch with link to PC offset. + void bl(int64_t imm26); + + // Compare and branch to label if zero. + void cbz(const Register& rt, Label* label); + + // Compare and branch to PC offset if zero. + void cbz(const Register& rt, int64_t imm19); + + // Compare and branch to label if not zero. + void cbnz(const Register& rt, Label* label); + + // Compare and branch to PC offset if not zero. + void cbnz(const Register& rt, int64_t imm19); + + // Table lookup from one register. + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Table lookup from two registers. + void tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm); + + // Table lookup from three registers. + void tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm); + + // Table lookup from four registers. + void tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm); + + // Table lookup extension from one register. + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Table lookup extension from two registers. + void tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm); + + // Table lookup extension from three registers. + void tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm); + + // Table lookup extension from four registers. + void tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm); + + // Test bit and branch to label if zero. + void tbz(const Register& rt, unsigned bit_pos, Label* label); + + // Test bit and branch to PC offset if zero. + void tbz(const Register& rt, unsigned bit_pos, int64_t imm14); + + // Test bit and branch to label if not zero. + void tbnz(const Register& rt, unsigned bit_pos, Label* label); + + // Test bit and branch to PC offset if not zero. + void tbnz(const Register& rt, unsigned bit_pos, int64_t imm14); + + // Address calculation instructions. + // Calculate a PC-relative address. Unlike for branches the offset in adr is + // unscaled (i.e. the result can be unaligned). + + // Calculate the address of a label. + void adr(const Register& xd, Label* label); + + // Calculate the address of a PC offset. + void adr(const Register& xd, int64_t imm21); + + // Calculate the page address of a label. + void adrp(const Register& xd, Label* label); + + // Calculate the page address of a PC offset. + void adrp(const Register& xd, int64_t imm21); + + // Data Processing instructions. + + // Add. + void add(const Register& rd, const Register& rn, const Operand& operand); + + // Add and update status flags. + void adds(const Register& rd, const Register& rn, const Operand& operand); + + // Compare negative. + void cmn(const Register& rn, const Operand& operand); + + // Subtract. + void sub(const Register& rd, const Register& rn, const Operand& operand); + + // Subtract and update status flags. + void subs(const Register& rd, const Register& rn, const Operand& operand); + + // Compare. + void cmp(const Register& rn, const Operand& operand); + + // Negate. + void neg(const Register& rd, const Operand& operand); + + // Negate and update status flags. + void negs(const Register& rd, const Operand& operand); + + // Add with carry bit. + void adc(const Register& rd, const Register& rn, const Operand& operand); + + // Add with carry bit and update status flags. + void adcs(const Register& rd, const Register& rn, const Operand& operand); + + // Subtract with carry bit. + void sbc(const Register& rd, const Register& rn, const Operand& operand); + + // Subtract with carry bit and update status flags. + void sbcs(const Register& rd, const Register& rn, const Operand& operand); + + // Rotate register right and insert into NZCV flags under the control of a + // mask [Armv8.4]. + void rmif(const Register& xn, unsigned rotation, StatusFlags flags); + + // Set NZCV flags from register, treated as an 8-bit value [Armv8.4]. + void setf8(const Register& rn); + + // Set NZCV flags from register, treated as an 16-bit value [Armv8.4]. + void setf16(const Register& rn); + + // Negate with carry bit. + void ngc(const Register& rd, const Operand& operand); + + // Negate with carry bit and update status flags. + void ngcs(const Register& rd, const Operand& operand); + + // Logical instructions. + + // Bitwise and (A & B). + void and_(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise and (A & B) and update status flags. + void ands(const Register& rd, const Register& rn, const Operand& operand); + + // Bit test and set flags. + void tst(const Register& rn, const Operand& operand); + + // Bit clear (A & ~B). + void bic(const Register& rd, const Register& rn, const Operand& operand); + + // Bit clear (A & ~B) and update status flags. + void bics(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise or (A | B). + void orr(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise nor (A | ~B). + void orn(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise eor/xor (A ^ B). + void eor(const Register& rd, const Register& rn, const Operand& operand); + + // Bitwise enor/xnor (A ^ ~B). + void eon(const Register& rd, const Register& rn, const Operand& operand); + + // Logical shift left by variable. + void lslv(const Register& rd, const Register& rn, const Register& rm); + + // Logical shift right by variable. + void lsrv(const Register& rd, const Register& rn, const Register& rm); + + // Arithmetic shift right by variable. + void asrv(const Register& rd, const Register& rn, const Register& rm); + + // Rotate right by variable. + void rorv(const Register& rd, const Register& rn, const Register& rm); + + // Bitfield instructions. + + // Bitfield move. + void bfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms); + + // Signed bitfield move. + void sbfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms); + + // Unsigned bitfield move. + void ubfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms); + + // Bfm aliases. + + // Bitfield insert. + void bfi(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= static_cast(rn.GetSizeInBits())); + bfm(rd, + rn, + (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), + width - 1); + } + + // Bitfield extract and insert low. + void bfxil(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= static_cast(rn.GetSizeInBits())); + bfm(rd, rn, lsb, lsb + width - 1); + } + + // Bitfield clear [Armv8.2]. + void bfc(const Register& rd, unsigned lsb, unsigned width) { + bfi(rd, AppropriateZeroRegFor(rd), lsb, width); + } + + // Sbfm aliases. + + // Arithmetic shift right. + void asr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(shift < static_cast(rd.GetSizeInBits())); + sbfm(rd, rn, shift, rd.GetSizeInBits() - 1); + } + + // Signed bitfield insert with zero at right. + void sbfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= static_cast(rn.GetSizeInBits())); + sbfm(rd, + rn, + (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), + width - 1); + } + + // Signed bitfield extract. + void sbfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= static_cast(rn.GetSizeInBits())); + sbfm(rd, rn, lsb, lsb + width - 1); + } + + // Signed extend byte. + void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } + + // Signed extend halfword. + void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } + + // Signed extend word. + void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } + + // Ubfm aliases. + + // Logical shift left. + void lsl(const Register& rd, const Register& rn, unsigned shift) { + unsigned reg_size = rd.GetSizeInBits(); + VIXL_ASSERT(shift < reg_size); + ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); + } + + // Logical shift right. + void lsr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(shift < static_cast(rd.GetSizeInBits())); + ubfm(rd, rn, shift, rd.GetSizeInBits() - 1); + } + + // Unsigned bitfield insert with zero at right. + void ubfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= static_cast(rn.GetSizeInBits())); + ubfm(rd, + rn, + (rd.GetSizeInBits() - lsb) & (rd.GetSizeInBits() - 1), + width - 1); + } + + // Unsigned bitfield extract. + void ubfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(width >= 1); + VIXL_ASSERT(lsb + width <= static_cast(rn.GetSizeInBits())); + ubfm(rd, rn, lsb, lsb + width - 1); + } + + // Unsigned extend byte. + void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } + + // Unsigned extend halfword. + void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } + + // Unsigned extend word. + void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } + + // Extract. + void extr(const Register& rd, + const Register& rn, + const Register& rm, + unsigned lsb); + + // Conditional select: rd = cond ? rn : rm. + void csel(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional select increment: rd = cond ? rn : rm + 1. + void csinc(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional select inversion: rd = cond ? rn : ~rm. + void csinv(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional select negation: rd = cond ? rn : -rm. + void csneg(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond); + + // Conditional set: rd = cond ? 1 : 0. + void cset(const Register& rd, Condition cond); + + // Conditional set mask: rd = cond ? -1 : 0. + void csetm(const Register& rd, Condition cond); + + // Conditional increment: rd = cond ? rn + 1 : rn. + void cinc(const Register& rd, const Register& rn, Condition cond); + + // Conditional invert: rd = cond ? ~rn : rn. + void cinv(const Register& rd, const Register& rn, Condition cond); + + // Conditional negate: rd = cond ? -rn : rn. + void cneg(const Register& rd, const Register& rn, Condition cond); + + // Rotate right. + void ror(const Register& rd, const Register& rs, unsigned shift) { + extr(rd, rs, rs, shift); + } + + // Conditional comparison. + + // Conditional compare negative. + void ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + + // Conditional compare. + void ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + + // CRC-32 checksum from byte. + void crc32b(const Register& wd, const Register& wn, const Register& wm); + + // CRC-32 checksum from half-word. + void crc32h(const Register& wd, const Register& wn, const Register& wm); + + // CRC-32 checksum from word. + void crc32w(const Register& wd, const Register& wn, const Register& wm); + + // CRC-32 checksum from double word. + void crc32x(const Register& wd, const Register& wn, const Register& xm); + + // CRC-32 C checksum from byte. + void crc32cb(const Register& wd, const Register& wn, const Register& wm); + + // CRC-32 C checksum from half-word. + void crc32ch(const Register& wd, const Register& wn, const Register& wm); + + // CRC-32 C checksum from word. + void crc32cw(const Register& wd, const Register& wn, const Register& wm); + + // CRC-32C checksum from double word. + void crc32cx(const Register& wd, const Register& wn, const Register& xm); + + // Multiply. + void mul(const Register& rd, const Register& rn, const Register& rm); + + // Negated multiply. + void mneg(const Register& rd, const Register& rn, const Register& rm); + + // Signed long multiply: 32 x 32 -> 64-bit. + void smull(const Register& xd, const Register& wn, const Register& wm); + + // Signed multiply high: 64 x 64 -> 64-bit <127:64>. + void smulh(const Register& xd, const Register& xn, const Register& xm); + + // Multiply and accumulate. + void madd(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Multiply and subtract. + void msub(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra); + + // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit. + void smaddl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa); + + // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit. + void umaddl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa); + + // Unsigned long multiply: 32 x 32 -> 64-bit. + void umull(const Register& xd, const Register& wn, const Register& wm) { + umaddl(xd, wn, wm, xzr); + } + + // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>. + void umulh(const Register& xd, const Register& xn, const Register& xm); + + // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit. + void smsubl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa); + + // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit. + void umsubl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa); + + // Signed integer divide. + void sdiv(const Register& rd, const Register& rn, const Register& rm); + + // Unsigned integer divide. + void udiv(const Register& rd, const Register& rn, const Register& rm); + + // Bit reverse. + void rbit(const Register& rd, const Register& rn); + + // Reverse bytes in 16-bit half words. + void rev16(const Register& rd, const Register& rn); + + // Reverse bytes in 32-bit words. + void rev32(const Register& xd, const Register& xn); + + // Reverse bytes in 64-bit general purpose register, an alias for rev + // [Armv8.2]. + void rev64(const Register& xd, const Register& xn) { + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); + rev(xd, xn); + } + + // Reverse bytes. + void rev(const Register& rd, const Register& rn); + + // Count leading zeroes. + void clz(const Register& rd, const Register& rn); + + // Count leading sign bits. + void cls(const Register& rd, const Register& rn); + + // Pointer Authentication Code for Instruction address, using key A [Armv8.3]. + void pacia(const Register& xd, const Register& rn); + + // Pointer Authentication Code for Instruction address, using key A and a + // modifier of zero [Armv8.3]. + void paciza(const Register& xd); + + // Pointer Authentication Code for Instruction address, using key A, with + // address in x17 and modifier in x16 [Armv8.3]. + void pacia1716(); + + // Pointer Authentication Code for Instruction address, using key A, with + // address in LR and modifier in SP [Armv8.3]. + void paciasp(); + + // Pointer Authentication Code for Instruction address, using key A, with + // address in LR and a modifier of zero [Armv8.3]. + void paciaz(); + + // Pointer Authentication Code for Instruction address, using key B [Armv8.3]. + void pacib(const Register& xd, const Register& xn); + + // Pointer Authentication Code for Instruction address, using key B and a + // modifier of zero [Armv8.3]. + void pacizb(const Register& xd); + + // Pointer Authentication Code for Instruction address, using key B, with + // address in x17 and modifier in x16 [Armv8.3]. + void pacib1716(); + + // Pointer Authentication Code for Instruction address, using key B, with + // address in LR and modifier in SP [Armv8.3]. + void pacibsp(); + + // Pointer Authentication Code for Instruction address, using key B, with + // address in LR and a modifier of zero [Armv8.3]. + void pacibz(); + + // Pointer Authentication Code for Data address, using key A [Armv8.3]. + void pacda(const Register& xd, const Register& xn); + + // Pointer Authentication Code for Data address, using key A and a modifier of + // zero [Armv8.3]. + void pacdza(const Register& xd); + + // Pointer Authentication Code for Data address, using key B [Armv8.3]. + void pacdb(const Register& xd, const Register& xn); + + // Pointer Authentication Code for Data address, using key B and a modifier of + // zero [Armv8.3]. + void pacdzb(const Register& xd); + + // Pointer Authentication Code, using Generic key [Armv8.3]. + void pacga(const Register& xd, const Register& xn, const Register& xm); + + // Authenticate Instruction address, using key A [Armv8.3]. + void autia(const Register& xd, const Register& xn); + + // Authenticate Instruction address, using key A and a modifier of zero + // [Armv8.3]. + void autiza(const Register& xd); + + // Authenticate Instruction address, using key A, with address in x17 and + // modifier in x16 [Armv8.3]. + void autia1716(); + + // Authenticate Instruction address, using key A, with address in LR and + // modifier in SP [Armv8.3]. + void autiasp(); + + // Authenticate Instruction address, using key A, with address in LR and a + // modifier of zero [Armv8.3]. + void autiaz(); + + // Authenticate Instruction address, using key B [Armv8.3]. + void autib(const Register& xd, const Register& xn); + + // Authenticate Instruction address, using key B and a modifier of zero + // [Armv8.3]. + void autizb(const Register& xd); + + // Authenticate Instruction address, using key B, with address in x17 and + // modifier in x16 [Armv8.3]. + void autib1716(); + + // Authenticate Instruction address, using key B, with address in LR and + // modifier in SP [Armv8.3]. + void autibsp(); + + // Authenticate Instruction address, using key B, with address in LR and a + // modifier of zero [Armv8.3]. + void autibz(); + + // Authenticate Data address, using key A [Armv8.3]. + void autda(const Register& xd, const Register& xn); + + // Authenticate Data address, using key A and a modifier of zero [Armv8.3]. + void autdza(const Register& xd); + + // Authenticate Data address, using key B [Armv8.3]. + void autdb(const Register& xd, const Register& xn); + + // Authenticate Data address, using key B and a modifier of zero [Armv8.3]. + void autdzb(const Register& xd); + + // Strip Pointer Authentication Code of Data address [Armv8.3]. + void xpacd(const Register& xd); + + // Strip Pointer Authentication Code of Instruction address [Armv8.3]. + void xpaci(const Register& xd); + + // Strip Pointer Authentication Code of Instruction address in LR [Armv8.3]. + void xpaclri(); + + // Memory instructions. + + // Load integer or FP register. + void ldr(const CPURegister& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Store integer or FP register. + void str(const CPURegister& rt, + const MemOperand& dst, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load word with sign extension. + void ldrsw(const Register& xt, + const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load byte. + void ldrb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Store byte. + void strb(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load byte with sign extension. + void ldrsb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load half-word. + void ldrh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Store half-word. + void strh(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load half-word with sign extension. + void ldrsh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferScaledOffset); + + // Load integer or FP register (with unscaled offset). + void ldur(const CPURegister& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Store integer or FP register (with unscaled offset). + void stur(const CPURegister& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load word with sign extension. + void ldursw(const Register& xt, + const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load byte (with unscaled offset). + void ldurb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Store byte (with unscaled offset). + void sturb(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load byte with sign extension (and unscaled offset). + void ldursb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load half-word (with unscaled offset). + void ldurh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Store half-word (with unscaled offset). + void sturh(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load half-word with sign extension (and unscaled offset). + void ldursh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Load double-word with pointer authentication, using data key A and a + // modifier of zero [Armv8.3]. + void ldraa(const Register& xt, const MemOperand& src); + + // Load double-word with pointer authentication, using data key B and a + // modifier of zero [Armv8.3]. + void ldrab(const Register& xt, const MemOperand& src); + + // Load integer or FP register pair. + void ldp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src); + + // Store integer or FP register pair. + void stp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst); + + // Load word pair with sign extension. + void ldpsw(const Register& xt, const Register& xt2, const MemOperand& src); + + // Load integer or FP register pair, non-temporal. + void ldnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src); + + // Store integer or FP register pair, non-temporal. + void stnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst); + + // Load integer or FP register from literal pool. + void ldr(const CPURegister& rt, RawLiteral* literal); + + // Load word with sign extension from literal pool. + void ldrsw(const Register& xt, RawLiteral* literal); + + // Load integer or FP register from pc + imm19 << 2. + void ldr(const CPURegister& rt, int64_t imm19); + + // Load word with sign extension from pc + imm19 << 2. + void ldrsw(const Register& xt, int64_t imm19); + + // Store exclusive byte. + void stxrb(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store exclusive half-word. + void stxrh(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store exclusive register. + void stxr(const Register& rs, const Register& rt, const MemOperand& dst); + + // Load exclusive byte. + void ldxrb(const Register& rt, const MemOperand& src); + + // Load exclusive half-word. + void ldxrh(const Register& rt, const MemOperand& src); + + // Load exclusive register. + void ldxr(const Register& rt, const MemOperand& src); + + // Store exclusive register pair. + void stxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst); + + // Load exclusive register pair. + void ldxp(const Register& rt, const Register& rt2, const MemOperand& src); + + // Store-release exclusive byte. + void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store-release exclusive half-word. + void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst); + + // Store-release exclusive register. + void stlxr(const Register& rs, const Register& rt, const MemOperand& dst); + + // Load-acquire exclusive byte. + void ldaxrb(const Register& rt, const MemOperand& src); + + // Load-acquire exclusive half-word. + void ldaxrh(const Register& rt, const MemOperand& src); + + // Load-acquire exclusive register. + void ldaxr(const Register& rt, const MemOperand& src); + + // Store-release exclusive register pair. + void stlxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst); + + // Load-acquire exclusive register pair. + void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src); + + // Store-release byte. + void stlrb(const Register& rt, const MemOperand& dst); + + // Store-release half-word. + void stlrh(const Register& rt, const MemOperand& dst); + + // Store-release register. + void stlr(const Register& rt, const MemOperand& dst); + + // Load-acquire byte. + void ldarb(const Register& rt, const MemOperand& src); + + // Load-acquire half-word. + void ldarh(const Register& rt, const MemOperand& src); + + // Load-acquire register. + void ldar(const Register& rt, const MemOperand& src); + + // Store LORelease byte [Armv8.1]. + void stllrb(const Register& rt, const MemOperand& dst); + + // Store LORelease half-word [Armv8.1]. + void stllrh(const Register& rt, const MemOperand& dst); + + // Store LORelease register [Armv8.1]. + void stllr(const Register& rt, const MemOperand& dst); + + // Load LORelease byte [Armv8.1]. + void ldlarb(const Register& rt, const MemOperand& src); + + // Load LORelease half-word [Armv8.1]. + void ldlarh(const Register& rt, const MemOperand& src); + + // Load LORelease register [Armv8.1]. + void ldlar(const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void cas(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void casa(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void casl(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap word or doubleword in memory [Armv8.1]. + void casal(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void casb(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void casab(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void caslb(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap byte in memory [Armv8.1]. + void casalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void cash(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void casah(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void caslh(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap halfword in memory [Armv8.1]. + void casalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void casp(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void caspa(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void caspl(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Compare and Swap Pair of words or doublewords in memory [Armv8.1]. + void caspal(const Register& rs, + const Register& rs2, + const Register& rt, + const Register& rt2, + const MemOperand& src); + + // Store-release byte (with unscaled offset) [Armv8.4]. + void stlurb(const Register& rt, const MemOperand& dst); + + // Load-acquire RCpc Register byte (with unscaled offset) [Armv8.4]. + void ldapurb(const Register& rt, const MemOperand& src); + + // Load-acquire RCpc Register signed byte (with unscaled offset) [Armv8.4]. + void ldapursb(const Register& rt, const MemOperand& src); + + // Store-release half-word (with unscaled offset) [Armv8.4]. + void stlurh(const Register& rt, const MemOperand& dst); + + // Load-acquire RCpc Register half-word (with unscaled offset) [Armv8.4]. + void ldapurh(const Register& rt, const MemOperand& src); + + // Load-acquire RCpc Register signed half-word (with unscaled offset) + // [Armv8.4]. + void ldapursh(const Register& rt, const MemOperand& src); + + // Store-release word or double-word (with unscaled offset) [Armv8.4]. + void stlur(const Register& rt, const MemOperand& dst); + + // Load-acquire RCpc Register word or double-word (with unscaled offset) + // [Armv8.4]. + void ldapur(const Register& rt, const MemOperand& src); + + // Load-acquire RCpc Register signed word (with unscaled offset) [Armv8.4]. + void ldapursw(const Register& xt, const MemOperand& src); + + // Atomic add on byte in memory [Armv8.1] + void ldaddb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1] + void ldaddab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, with Store-release semantics [Armv8.1] + void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, with Load-acquire and Store-release semantics + // [Armv8.1] + void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory [Armv8.1] + void ldaddh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1] + void ldaddah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory, with Store-release semantics [Armv8.1] + void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on halfword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory [Armv8.1] + void ldadd(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Load-acquire semantics + // [Armv8.1] + void ldadda(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Store-release semantics + // [Armv8.1] + void ldaddl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldaddal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory [Armv8.1] + void ldclrb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1] + void ldclrab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1] + void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldclralb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory [Armv8.1] + void ldclrh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldclrah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldclralh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory [Armv8.1] + void ldclr(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldclra(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldclrl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldclral(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory [Armv8.1] + void ldeorb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldeorab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Store-release semantics + // [Armv8.1] + void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory [Armv8.1] + void ldeorh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldeorah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory [Armv8.1] + void ldeor(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldeora(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldeorl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldeoral(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory [Armv8.1] + void ldsetb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1] + void ldsetab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1] + void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on byte in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory [Armv8.1] + void ldseth(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1] + void ldsetah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory [Armv8.1] + void ldset(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Load-acquire semantics + // [Armv8.1] + void ldseta(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldsetl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsetal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory [Armv8.1] + void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Store-release semantics + // [Armv8.1] + void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory [Armv8.1] + void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory [Armv8.1] + void ldsmax(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory [Armv8.1] + void ldsminb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldsminab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Store-release semantics + // [Armv8.1] + void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory [Armv8.1] + void ldsminh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldsminah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory [Armv8.1] + void ldsmin(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldsmina(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldsminl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void ldsminal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory [Armv8.1] + void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Store-release semantics + // [Armv8.1] + void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory [Armv8.1] + void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Store-release semantics + // [Armv8.1] + void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory [Armv8.1] + void ldumax(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory [Armv8.1] + void lduminb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Load-acquire semantics + // [Armv8.1] + void lduminab(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Store-release semantics + // [Armv8.1] + void lduminlb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void lduminalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory [Armv8.1] + void lduminh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics + // [Armv8.1] + void lduminah(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Store-release semantics + // [Armv8.1] + void lduminlh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Load-acquire and + // Store-release semantics [Armv8.1] + void lduminalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory [Armv8.1] + void ldumin(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire + // semantics [Armv8.1] + void ldumina(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Store-release + // semantics [Armv8.1] + void lduminl(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire + // and Store-release semantics [Armv8.1] + void lduminal(const Register& rs, const Register& rt, const MemOperand& src); + + // Atomic add on byte in memory, without return. [Armv8.1] + void staddb(const Register& rs, const MemOperand& src); + + // Atomic add on byte in memory, with Store-release semantics and without + // return. [Armv8.1] + void staddlb(const Register& rs, const MemOperand& src); + + // Atomic add on halfword in memory, without return. [Armv8.1] + void staddh(const Register& rs, const MemOperand& src); + + // Atomic add on halfword in memory, with Store-release semantics and without + // return. [Armv8.1] + void staddlh(const Register& rs, const MemOperand& src); + + // Atomic add on word or doubleword in memory, without return. [Armv8.1] + void stadd(const Register& rs, const MemOperand& src); + + // Atomic add on word or doubleword in memory, with Store-release semantics + // and without return. [Armv8.1] + void staddl(const Register& rs, const MemOperand& src); + + // Atomic bit clear on byte in memory, without return. [Armv8.1] + void stclrb(const Register& rs, const MemOperand& src); + + // Atomic bit clear on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stclrlb(const Register& rs, const MemOperand& src); + + // Atomic bit clear on halfword in memory, without return. [Armv8.1] + void stclrh(const Register& rs, const MemOperand& src); + + // Atomic bit clear on halfword in memory, with Store-release semantics and + // without return. [Armv8.1] + void stclrlh(const Register& rs, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1] + void stclr(const Register& rs, const MemOperand& src); + + // Atomic bit clear on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stclrl(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, without return. [Armv8.1] + void steorb(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void steorlb(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, without return. [Armv8.1] + void steorh(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void steorlh(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, without return. + // [Armv8.1] + void steor(const Register& rs, const MemOperand& src); + + // Atomic exclusive OR on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void steorl(const Register& rs, const MemOperand& src); + + // Atomic bit set on byte in memory, without return. [Armv8.1] + void stsetb(const Register& rs, const MemOperand& src); + + // Atomic bit set on byte in memory, with Store-release semantics and without + // return. [Armv8.1] + void stsetlb(const Register& rs, const MemOperand& src); + + // Atomic bit set on halfword in memory, without return. [Armv8.1] + void stseth(const Register& rs, const MemOperand& src); + + // Atomic bit set on halfword in memory, with Store-release semantics and + // without return. [Armv8.1] + void stsetlh(const Register& rs, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, without return. [Armv8.1] + void stset(const Register& rs, const MemOperand& src); + + // Atomic bit set on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stsetl(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on byte in memory, without return. [Armv8.1] + void stsmaxb(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stsmaxlb(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, without return. [Armv8.1] + void stsmaxh(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stsmaxlh(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, without return. + // [Armv8.1] + void stsmax(const Register& rs, const MemOperand& src); + + // Atomic signed maximum on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stsmaxl(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on byte in memory, without return. [Armv8.1] + void stsminb(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stsminlb(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, without return. [Armv8.1] + void stsminh(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stsminlh(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, without return. + // [Armv8.1] + void stsmin(const Register& rs, const MemOperand& src); + + // Atomic signed minimum on word or doubleword in memory, with Store-release + // semantics and without return. semantics [Armv8.1] + void stsminl(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, without return. [Armv8.1] + void stumaxb(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stumaxlb(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1] + void stumaxh(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stumaxlh(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, without return. + // [Armv8.1] + void stumax(const Register& rs, const MemOperand& src); + + // Atomic unsigned maximum on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stumaxl(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, without return. [Armv8.1] + void stuminb(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on byte in memory, with Store-release semantics and + // without return. [Armv8.1] + void stuminlb(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1] + void stuminh(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on halfword in memory, with Store-release semantics + // and without return. [Armv8.1] + void stuminlh(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, without return. + // [Armv8.1] + void stumin(const Register& rs, const MemOperand& src); + + // Atomic unsigned minimum on word or doubleword in memory, with Store-release + // semantics and without return. [Armv8.1] + void stuminl(const Register& rs, const MemOperand& src); + + // Swap byte in memory [Armv8.1] + void swpb(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap byte in memory, with Load-acquire semantics [Armv8.1] + void swpab(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap byte in memory, with Store-release semantics [Armv8.1] + void swplb(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap byte in memory, with Load-acquire and Store-release semantics + // [Armv8.1] + void swpalb(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory [Armv8.1] + void swph(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory, with Load-acquire semantics [Armv8.1] + void swpah(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory, with Store-release semantics [Armv8.1] + void swplh(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap halfword in memory, with Load-acquire and Store-release semantics + // [Armv8.1] + void swpalh(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory [Armv8.1] + void swp(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1] + void swpa(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory, with Store-release semantics [Armv8.1] + void swpl(const Register& rs, const Register& rt, const MemOperand& src); + + // Swap word or doubleword in memory, with Load-acquire and Store-release + // semantics [Armv8.1] + void swpal(const Register& rs, const Register& rt, const MemOperand& src); + + // Load-Acquire RCpc Register byte [Armv8.3] + void ldaprb(const Register& rt, const MemOperand& src); + + // Load-Acquire RCpc Register halfword [Armv8.3] + void ldaprh(const Register& rt, const MemOperand& src); + + // Load-Acquire RCpc Register word or doubleword [Armv8.3] + void ldapr(const Register& rt, const MemOperand& src); + + // Prefetch memory. + void prfm(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + + // Prefetch memory (with unscaled offset). + void prfum(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Prefetch memory in the literal pool. + void prfm(PrefetchOperation op, RawLiteral* literal); + + // Prefetch from pc + imm19 << 2. + void prfm(PrefetchOperation op, int64_t imm19); + + // Prefetch memory (allowing unallocated hints). + void prfm(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + + // Prefetch memory (with unscaled offset, allowing unallocated hints). + void prfum(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferUnscaledOffset); + + // Prefetch memory in the literal pool (allowing unallocated hints). + void prfm(int op, RawLiteral* literal); + + // Prefetch from pc + imm19 << 2 (allowing unallocated hints). + void prfm(int op, int64_t imm19); + + // Move instructions. The default shift of -1 indicates that the move + // instruction will calculate an appropriate 16-bit immediate and left shift + // that is equal to the 64-bit immediate argument. If an explicit left shift + // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. + // + // For movk, an explicit shift can be used to indicate which half word should + // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant + // half word with zero, whereas movk(x0, 0, 48) will overwrite the + // most-significant. + + // Move immediate and keep. + void movk(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVK); + } + + // Move inverted immediate. + void movn(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVN); + } + + // Move immediate. + void movz(const Register& rd, uint64_t imm, int shift = -1) { + MoveWide(rd, imm, shift, MOVZ); + } + + // Move immediate, aliases for movz, movn, orr. + void mov(const Register& rd, uint64_t imm) { + if (!OneInstrMoveImmediateHelper(this, rd, imm)) { + VIXL_UNIMPLEMENTED(); + } + } + + // Misc instructions. + + // Monitor debug-mode breakpoint. + void brk(int code); + + // Halting debug-mode breakpoint. + void hlt(int code); + + // Generate exception targeting EL1. + void svc(int code); + + // Generate undefined instruction exception. + void udf(int code); + + // Move register to register. + void mov(const Register& rd, const Register& rn); + + // Move inverted operand to register. + void mvn(const Register& rd, const Operand& operand); + + // System instructions. + + // Move to register from system register. + void mrs(const Register& xt, SystemRegister sysreg); + + // Move from register to system register. + void msr(SystemRegister sysreg, const Register& xt); + + // Invert carry flag [Armv8.4]. + void cfinv(); + + // Convert floating-point condition flags from alternative format to Arm + // format [Armv8.5]. + void xaflag(); + + // Convert floating-point condition flags from Arm format to alternative + // format [Armv8.5]. + void axflag(); + + // System instruction. + void sys(int op1, int crn, int crm, int op2, const Register& xt = xzr); + + // System instruction with pre-encoded op (op1:crn:crm:op2). + void sys(int op, const Register& xt = xzr); + + // System data cache operation. + void dc(DataCacheOp op, const Register& rt); + + // System instruction cache operation. + void ic(InstructionCacheOp op, const Register& rt); + + // System hint (named type). + void hint(SystemHint code); + + // System hint (numbered type). + void hint(int imm7); + + // Clear exclusive monitor. + void clrex(int imm4 = 0xf); + + // Data memory barrier. + void dmb(BarrierDomain domain, BarrierType type); + + // Data synchronization barrier. + void dsb(BarrierDomain domain, BarrierType type); + + // Instruction synchronization barrier. + void isb(); + + // Error synchronization barrier. + void esb(); + + // Conditional speculation dependency barrier. + void csdb(); + + // No-op. + void nop() { hint(NOP); } + + // Branch target identification. + void bti(BranchTargetIdentifier id); + + // FP and NEON instructions. + + // Move double precision immediate to FP register. + void fmov(const VRegister& vd, double imm); + + // Move single precision immediate to FP register. + void fmov(const VRegister& vd, float imm); + + // Move half precision immediate to FP register [Armv8.2]. + void fmov(const VRegister& vd, Float16 imm); + + // Move FP register to register. + void fmov(const Register& rd, const VRegister& fn); + + // Move register to FP register. + void fmov(const VRegister& vd, const Register& rn); + + // Move FP register to FP register. + void fmov(const VRegister& vd, const VRegister& fn); + + // Move 64-bit register to top half of 128-bit FP register. + void fmov(const VRegister& vd, int index, const Register& rn); + + // Move top half of 128-bit FP register to 64-bit register. + void fmov(const Register& rd, const VRegister& vn, int index); + + // FP add. + void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP subtract. + void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP multiply. + void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-add. + void fmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP fused multiply-subtract. + void fmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP fused multiply-add and negate. + void fnmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP fused multiply-subtract and negate. + void fnmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va); + + // FP multiply-negate scalar. + void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP reciprocal exponent scalar. + void frecpx(const VRegister& vd, const VRegister& vn); + + // FP divide. + void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP maximum. + void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP minimum. + void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP maximum number. + void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP minimum number. + void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm); + + // FP absolute. + void fabs(const VRegister& vd, const VRegister& vn); + + // FP negate. + void fneg(const VRegister& vd, const VRegister& vn); + + // FP square root. + void fsqrt(const VRegister& vd, const VRegister& vn); + + // FP round to integer, nearest with ties to away. + void frinta(const VRegister& vd, const VRegister& vn); + + // FP round to integer, implicit rounding. + void frinti(const VRegister& vd, const VRegister& vn); + + // FP round to integer, toward minus infinity. + void frintm(const VRegister& vd, const VRegister& vn); + + // FP round to integer, nearest with ties to even. + void frintn(const VRegister& vd, const VRegister& vn); + + // FP round to integer, toward plus infinity. + void frintp(const VRegister& vd, const VRegister& vn); + + // FP round to integer, exact, implicit rounding. + void frintx(const VRegister& vd, const VRegister& vn); + + // FP round to integer, towards zero. + void frintz(const VRegister& vd, const VRegister& vn); + + // FP round to 32-bit integer, exact, implicit rounding [Armv8.5]. + void frint32x(const VRegister& vd, const VRegister& vn); + + // FP round to 32-bit integer, towards zero [Armv8.5]. + void frint32z(const VRegister& vd, const VRegister& vn); + + // FP round to 64-bit integer, exact, implicit rounding [Armv8.5]. + void frint64x(const VRegister& vd, const VRegister& vn); + + // FP round to 64-bit integer, towards zero [Armv8.5]. + void frint64z(const VRegister& vd, const VRegister& vn); + + void FPCompareMacro(const VRegister& vn, double value, FPTrapFlags trap); + + void FPCompareMacro(const VRegister& vn, + const VRegister& vm, + FPTrapFlags trap); + + // FP compare registers. + void fcmp(const VRegister& vn, const VRegister& vm); + + // FP compare immediate. + void fcmp(const VRegister& vn, double value); + + void FPCCompareMacro(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond, + FPTrapFlags trap); + + // FP conditional compare. + void fccmp(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond); + + // FP signaling compare registers. + void fcmpe(const VRegister& vn, const VRegister& vm); + + // FP signaling compare immediate. + void fcmpe(const VRegister& vn, double value); + + // FP conditional signaling compare. + void fccmpe(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond); + + // FP conditional select. + void fcsel(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Condition cond); + + // Common FP Convert functions. + void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); + void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); + void NEONFP16ConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); + + // FP convert between precisions. + void fcvt(const VRegister& vd, const VRegister& vn); + + // FP convert to higher precision. + void fcvtl(const VRegister& vd, const VRegister& vn); + + // FP convert to higher precision (second part). + void fcvtl2(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision. + void fcvtn(const VRegister& vd, const VRegister& vn); + + // FP convert to lower prevision (second part). + void fcvtn2(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision, rounding to odd. + void fcvtxn(const VRegister& vd, const VRegister& vn); + + // FP convert to lower precision, rounding to odd (second part). + void fcvtxn2(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to away. + void fcvtas(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to away. + void fcvtau(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to away. + void fcvtas(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to away. + void fcvtau(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, round towards -infinity. + void fcvtms(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, round towards -infinity. + void fcvtmu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, round towards -infinity. + void fcvtms(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, round towards -infinity. + void fcvtmu(const VRegister& vd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to even. + void fcvtns(const Register& rd, const VRegister& vn); + + // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3]. + void fjcvtzs(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to even. + void fcvtnu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, nearest with ties to even. + void fcvtns(const VRegister& rd, const VRegister& vn); + + // FP convert to unsigned integer, nearest with ties to even. + void fcvtnu(const VRegister& rd, const VRegister& vn); + + // FP convert to signed integer or fixed-point, round towards zero. + void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); + + // FP convert to unsigned integer or fixed-point, round towards zero. + void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); + + // FP convert to signed integer or fixed-point, round towards zero. + void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); + + // FP convert to unsigned integer or fixed-point, round towards zero. + void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); + + // FP convert to signed integer, round towards +infinity. + void fcvtps(const Register& rd, const VRegister& vn); + + // FP convert to unsigned integer, round towards +infinity. + void fcvtpu(const Register& rd, const VRegister& vn); + + // FP convert to signed integer, round towards +infinity. + void fcvtps(const VRegister& vd, const VRegister& vn); + + // FP convert to unsigned integer, round towards +infinity. + void fcvtpu(const VRegister& vd, const VRegister& vn); + + // Convert signed integer or fixed point to FP. + void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); + + // Convert unsigned integer or fixed point to FP. + void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); + + // Convert signed integer or fixed-point to FP. + void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); + + // Convert unsigned integer or fixed-point to FP. + void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); + + // Unsigned absolute difference. + void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference. + void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference and accumulate. + void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference and accumulate. + void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add. + void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Subtract. + void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned halving add. + void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed halving add. + void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned rounding halving add. + void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed rounding halving add. + void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned halving sub. + void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed halving sub. + void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating add. + void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating add. + void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating subtract. + void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating subtract. + void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add pairwise. + void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add pair of elements scalar. + void addp(const VRegister& vd, const VRegister& vn); + + // Multiply-add to accumulator. + void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Multiply-subtract to accumulator. + void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Multiply. + void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Multiply by scalar element. + void mul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Multiply-add by scalar element. + void mla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Multiply-subtract by scalar element. + void mls(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-add by scalar element. + void smlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-add by scalar element (second part). + void smlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-add by scalar element. + void umlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-add by scalar element (second part). + void umlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-sub by scalar element. + void smlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply-sub by scalar element (second part). + void smlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-sub by scalar element. + void umlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply-sub by scalar element (second part). + void umlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply by scalar element. + void smull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed long multiply by scalar element (second part). + void smull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply by scalar element. + void umull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply by scalar element (second part). + void umull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating double long multiply by element. + void sqdmull(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating double long multiply by element (second part). + void sqdmull2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-add by element. + void sqdmlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-add by element (second part). + void sqdmlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-sub by element. + void sqdmlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating doubling long multiply-sub by element (second part). + void sqdmlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Compare equal. + void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare signed greater than or equal. + void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare signed greater than. + void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare unsigned higher. + void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare unsigned higher or same. + void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare bitwise test bits nonzero. + void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Compare bitwise to zero. + void cmeq(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed greater than or equal to zero. + void cmge(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed greater than zero. + void cmgt(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed less than or equal to zero. + void cmle(const VRegister& vd, const VRegister& vn, int value); + + // Compare signed less than zero. + void cmlt(const VRegister& vd, const VRegister& vn, int value); + + // Signed shift left by register. + void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned shift left by register. + void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating shift left by register. + void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating shift left by register. + void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed rounding shift left by register. + void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned rounding shift left by register. + void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating rounding shift left by register. + void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned saturating rounding shift left by register. + void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise and. + void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise or. + void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise or immediate. + void orr(const VRegister& vd, const int imm8, const int left_shift = 0); + + // Move register to register. + void mov(const VRegister& vd, const VRegister& vn); + + // Bitwise orn. + void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise eor. + void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bit clear immediate. + void bic(const VRegister& vd, const int imm8, const int left_shift = 0); + + // Bit clear. + void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise insert if false. + void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise insert if true. + void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Bitwise select. + void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Polynomial multiply. + void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Vector move immediate. + void movi(const VRegister& vd, + const uint64_t imm, + Shift shift = LSL, + const int shift_amount = 0); + + // Bitwise not. + void mvn(const VRegister& vd, const VRegister& vn); + + // Vector move inverted immediate. + void mvni(const VRegister& vd, + const int imm8, + Shift shift = LSL, + const int shift_amount = 0); + + // Signed saturating accumulate of unsigned value. + void suqadd(const VRegister& vd, const VRegister& vn); + + // Unsigned saturating accumulate of signed value. + void usqadd(const VRegister& vd, const VRegister& vn); + + // Absolute value. + void abs(const VRegister& vd, const VRegister& vn); + + // Signed saturating absolute value. + void sqabs(const VRegister& vd, const VRegister& vn); + + // Negate. + void neg(const VRegister& vd, const VRegister& vn); + + // Signed saturating negate. + void sqneg(const VRegister& vd, const VRegister& vn); + + // Bitwise not. + void not_(const VRegister& vd, const VRegister& vn); + + // Extract narrow. + void xtn(const VRegister& vd, const VRegister& vn); + + // Extract narrow (second part). + void xtn2(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract narrow. + void sqxtn(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract narrow (second part). + void sqxtn2(const VRegister& vd, const VRegister& vn); + + // Unsigned saturating extract narrow. + void uqxtn(const VRegister& vd, const VRegister& vn); + + // Unsigned saturating extract narrow (second part). + void uqxtn2(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract unsigned narrow. + void sqxtun(const VRegister& vd, const VRegister& vn); + + // Signed saturating extract unsigned narrow (second part). + void sqxtun2(const VRegister& vd, const VRegister& vn); + + // Extract vector from pair of vectors. + void ext(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int index); + + // Duplicate vector element to vector or scalar. + void dup(const VRegister& vd, const VRegister& vn, int vn_index); + + // Move vector element to scalar. + void mov(const VRegister& vd, const VRegister& vn, int vn_index); + + // Duplicate general-purpose register to vector. + void dup(const VRegister& vd, const Register& rn); + + // Insert vector element from another vector element. + void ins(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index); + + // Move vector element to another vector element. + void mov(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index); + + // Insert vector element from general-purpose register. + void ins(const VRegister& vd, int vd_index, const Register& rn); + + // Move general-purpose register to a vector element. + void mov(const VRegister& vd, int vd_index, const Register& rn); + + // Unsigned move vector element to general-purpose register. + void umov(const Register& rd, const VRegister& vn, int vn_index); + + // Move vector element to general-purpose register. + void mov(const Register& rd, const VRegister& vn, int vn_index); + + // Signed move vector element to general-purpose register. + void smov(const Register& rd, const VRegister& vn, int vn_index); + + // One-element structure load to one register. + void ld1(const VRegister& vt, const MemOperand& src); + + // One-element structure load to two registers. + void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // One-element structure load to three registers. + void ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // One-element structure load to four registers. + void ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // One-element single structure load to one lane. + void ld1(const VRegister& vt, int lane, const MemOperand& src); + + // One-element single structure load to all lanes. + void ld1r(const VRegister& vt, const MemOperand& src); + + // Two-element structure load. + void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // Two-element single structure load to one lane. + void ld2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src); + + // Two-element single structure load to all lanes. + void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // Three-element structure load. + void ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // Three-element single structure load to one lane. + void ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src); + + // Three-element single structure load to all lanes. + void ld3r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // Four-element structure load. + void ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // Four-element single structure load to one lane. + void ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src); + + // Four-element single structure load to all lanes. + void ld4r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // Count leading sign bits. + void cls(const VRegister& vd, const VRegister& vn); + + // Count leading zero bits (vector). + void clz(const VRegister& vd, const VRegister& vn); + + // Population count per byte. + void cnt(const VRegister& vd, const VRegister& vn); + + // Reverse bit order. + void rbit(const VRegister& vd, const VRegister& vn); + + // Reverse elements in 16-bit halfwords. + void rev16(const VRegister& vd, const VRegister& vn); + + // Reverse elements in 32-bit words. + void rev32(const VRegister& vd, const VRegister& vn); + + // Reverse elements in 64-bit doublewords. + void rev64(const VRegister& vd, const VRegister& vn); + + // Unsigned reciprocal square root estimate. + void ursqrte(const VRegister& vd, const VRegister& vn); + + // Unsigned reciprocal estimate. + void urecpe(const VRegister& vd, const VRegister& vn); + + // Signed pairwise long add. + void saddlp(const VRegister& vd, const VRegister& vn); + + // Unsigned pairwise long add. + void uaddlp(const VRegister& vd, const VRegister& vn); + + // Signed pairwise long add and accumulate. + void sadalp(const VRegister& vd, const VRegister& vn); + + // Unsigned pairwise long add and accumulate. + void uadalp(const VRegister& vd, const VRegister& vn); + + // Shift left by immediate. + void shl(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift left by immediate. + void sqshl(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift left unsigned by immediate. + void sqshlu(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating shift left by immediate. + void uqshl(const VRegister& vd, const VRegister& vn, int shift); + + // Signed shift left long by immediate. + void sshll(const VRegister& vd, const VRegister& vn, int shift); + + // Signed shift left long by immediate (second part). + void sshll2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed extend long. + void sxtl(const VRegister& vd, const VRegister& vn); + + // Signed extend long (second part). + void sxtl2(const VRegister& vd, const VRegister& vn); + + // Unsigned shift left long by immediate. + void ushll(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned shift left long by immediate (second part). + void ushll2(const VRegister& vd, const VRegister& vn, int shift); + + // Shift left long by element size. + void shll(const VRegister& vd, const VRegister& vn, int shift); + + // Shift left long by element size (second part). + void shll2(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned extend long. + void uxtl(const VRegister& vd, const VRegister& vn); + + // Unsigned extend long (second part). + void uxtl2(const VRegister& vd, const VRegister& vn); + + // Shift left by immediate and insert. + void sli(const VRegister& vd, const VRegister& vn, int shift); + + // Shift right by immediate and insert. + void sri(const VRegister& vd, const VRegister& vn, int shift); + + // Signed maximum. + void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed pairwise maximum. + void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add across vector. + void addv(const VRegister& vd, const VRegister& vn); + + // Signed add long across vector. + void saddlv(const VRegister& vd, const VRegister& vn); + + // Unsigned add long across vector. + void uaddlv(const VRegister& vd, const VRegister& vn); + + // FP maximum number across vector. + void fmaxnmv(const VRegister& vd, const VRegister& vn); + + // FP maximum across vector. + void fmaxv(const VRegister& vd, const VRegister& vn); + + // FP minimum number across vector. + void fminnmv(const VRegister& vd, const VRegister& vn); + + // FP minimum across vector. + void fminv(const VRegister& vd, const VRegister& vn); + + // Signed maximum across vector. + void smaxv(const VRegister& vd, const VRegister& vn); + + // Signed minimum. + void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed minimum pairwise. + void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed minimum across vector. + void sminv(const VRegister& vd, const VRegister& vn); + + // One-element structure store from one register. + void st1(const VRegister& vt, const MemOperand& src); + + // One-element structure store from two registers. + void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // One-element structure store from three registers. + void st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // One-element structure store from four registers. + void st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // One-element single structure store from one lane. + void st1(const VRegister& vt, int lane, const MemOperand& src); + + // Two-element structure store from two registers. + void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); + + // Two-element single structure store from two lanes. + void st2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src); + + // Three-element structure store from three registers. + void st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src); + + // Three-element single structure store from three lanes. + void st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src); + + // Four-element structure store from four registers. + void st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src); + + // Four-element single structure store from four lanes. + void st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src); + + // Unsigned add long. + void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned add long (second part). + void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned add wide. + void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned add wide (second part). + void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add long. + void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add long (second part). + void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add wide. + void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed add wide (second part). + void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract long. + void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract long (second part). + void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract wide. + void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned subtract wide (second part). + void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed subtract long. + void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed subtract long (second part). + void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed integer subtract wide. + void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed integer subtract wide (second part). + void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned maximum. + void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned pairwise maximum. + void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned maximum across vector. + void umaxv(const VRegister& vd, const VRegister& vn); + + // Unsigned minimum. + void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned pairwise minimum. + void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned minimum across vector. + void uminv(const VRegister& vd, const VRegister& vn); + + // Transpose vectors (primary). + void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Transpose vectors (secondary). + void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unzip vectors (primary). + void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unzip vectors (secondary). + void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Zip vectors (primary). + void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Zip vectors (secondary). + void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed shift right by immediate. + void sshr(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned shift right by immediate. + void ushr(const VRegister& vd, const VRegister& vn, int shift); + + // Signed rounding shift right by immediate. + void srshr(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned rounding shift right by immediate. + void urshr(const VRegister& vd, const VRegister& vn, int shift); + + // Signed shift right by immediate and accumulate. + void ssra(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned shift right by immediate and accumulate. + void usra(const VRegister& vd, const VRegister& vn, int shift); + + // Signed rounding shift right by immediate and accumulate. + void srsra(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned rounding shift right by immediate and accumulate. + void ursra(const VRegister& vd, const VRegister& vn, int shift); + + // Shift right narrow by immediate. + void shrn(const VRegister& vd, const VRegister& vn, int shift); + + // Shift right narrow by immediate (second part). + void shrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Rounding shift right narrow by immediate. + void rshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Rounding shift right narrow by immediate (second part). + void rshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating shift right narrow by immediate. + void uqshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating shift right narrow by immediate (second part). + void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating rounding shift right narrow by immediate. + void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Unsigned saturating rounding shift right narrow by immediate (second part). + void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right narrow by immediate. + void sqshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right narrow by immediate (second part). + void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating rounded shift right narrow by immediate. + void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating rounded shift right narrow by immediate (second part). + void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right unsigned narrow by immediate. + void sqshrun(const VRegister& vd, const VRegister& vn, int shift); + + // Signed saturating shift right unsigned narrow by immediate (second part). + void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); + + // Signed sat rounded shift right unsigned narrow by immediate. + void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); + + // Signed sat rounded shift right unsigned narrow by immediate (second part). + void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); + + // FP reciprocal step. + void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP reciprocal estimate. + void frecpe(const VRegister& vd, const VRegister& vn); + + // FP reciprocal square root estimate. + void frsqrte(const VRegister& vd, const VRegister& vn); + + // FP reciprocal square root step. + void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference and accumulate long. + void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference and accumulate long (second part). + void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference and accumulate long. + void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference and accumulate long (second part). + void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference long. + void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed absolute difference long (second part). + void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference long. + void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned absolute difference long (second part). + void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Polynomial multiply long. + void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Polynomial multiply long (second part). + void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-add. + void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-add (second part). + void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-add. + void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-add (second part). + void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-sub. + void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply-sub (second part). + void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-sub. + void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply-sub (second part). + void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply. + void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed long multiply (second part). + void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-add. + void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-add (second part). + void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-subtract. + void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply-subtract (second part). + void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply. + void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling long multiply (second part). + void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling multiply returning high half. + void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating rounding doubling multiply returning high half. + void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed dot product [Armv8.2]. + void sdot(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating rounding doubling multiply accumulate returning high + // half [Armv8.1]. + void sqrdmlah(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned dot product [Armv8.2]. + void udot(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Dot Product with unsigned and signed integers (vector). + void usdot(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Dot product with signed and unsigned integers (vector, by element). + void sudot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Dot product with unsigned and signed integers (vector, by element). + void usdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating rounding doubling multiply subtract returning high half + // [Armv8.1]. + void sqrdmlsh(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Signed saturating doubling multiply element returning high half. + void sqdmulh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating rounding doubling multiply element returning high half. + void sqrdmulh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed dot product by element [Armv8.2]. + void sdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating rounding doubling multiply accumulate element returning + // high half [Armv8.1]. + void sqrdmlah(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned dot product by element [Armv8.2]. + void udot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Signed saturating rounding doubling multiply subtract element returning + // high half [Armv8.1]. + void sqrdmlsh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // Unsigned long multiply long. + void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned long multiply (second part). + void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add narrow returning high half. + void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Add narrow returning high half (second part). + void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding add narrow returning high half. + void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding add narrow returning high half (second part). + void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Subtract narrow returning high half. + void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Subtract narrow returning high half (second part). + void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding subtract narrow returning high half. + void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Rounding subtract narrow returning high half (second part). + void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP vector multiply accumulate. + void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-add long to accumulator. + void fmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-add long to accumulator (second part). + void fmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-add long to accumulator by element. + void fmlal(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP fused multiply-add long to accumulator by element (second part). + void fmlal2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP vector multiply subtract. + void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-subtract long to accumulator. + void fmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-subtract long to accumulator (second part). + void fmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP fused multiply-subtract long to accumulator by element. + void fmlsl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP fused multiply-subtract long to accumulator by element (second part). + void fmlsl2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP vector multiply extended. + void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP absolute greater than or equal. + void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP absolute greater than. + void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP multiply by element. + void fmul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP fused multiply-add to accumulator by element. + void fmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP fused multiply-sub from accumulator by element. + void fmls(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP multiply extended by element. + void fmulx(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index); + + // FP compare equal. + void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP greater than. + void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP greater than or equal. + void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP compare equal to zero. + void fcmeq(const VRegister& vd, const VRegister& vn, double imm); + + // FP greater than zero. + void fcmgt(const VRegister& vd, const VRegister& vn, double imm); + + // FP greater than or equal to zero. + void fcmge(const VRegister& vd, const VRegister& vn, double imm); + + // FP less than or equal to zero. + void fcmle(const VRegister& vd, const VRegister& vn, double imm); + + // FP less than to zero. + void fcmlt(const VRegister& vd, const VRegister& vn, double imm); + + // FP absolute difference. + void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise add vector. + void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise add scalar. + void faddp(const VRegister& vd, const VRegister& vn); + + // FP pairwise maximum vector. + void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise maximum scalar. + void fmaxp(const VRegister& vd, const VRegister& vn); + + // FP pairwise minimum vector. + void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise minimum scalar. + void fminp(const VRegister& vd, const VRegister& vn); + + // FP pairwise maximum number vector. + void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise maximum number scalar. + void fmaxnmp(const VRegister& vd, const VRegister& vn); + + // FP pairwise minimum number vector. + void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // FP pairwise minimum number scalar. + void fminnmp(const VRegister& vd, const VRegister& vn); + + // v8.3 complex numbers - note that these are only partial/helper functions + // and must be used in series in order to perform full CN operations. + + // FP complex multiply accumulate (by element) [Armv8.3]. + void fcmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + int rot); + + // FP complex multiply accumulate [Armv8.3]. + void fcmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int rot); + + // FP complex add [Armv8.3]. + void fcadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int rot); + + // Signed 8-bit integer matrix multiply-accumulate (vector). + void smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned and signed 8-bit integer matrix multiply-accumulate (vector). + void usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Unsigned 8-bit integer matrix multiply-accumulate (vector). + void ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm); + + // Scalable Vector Extensions. + + // Absolute value (predicated). + void abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Add vectors (predicated). + void add(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Add vectors (unpredicated). + void add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Add immediate (unpredicated). + void add(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); + + // Add multiple of predicate register size to scalar register. + void addpl(const Register& xd, const Register& xn, int imm6); + + // Add multiple of vector register size to scalar register. + void addvl(const Register& xd, const Register& xn, int imm6); + + // Compute vector address. + void adr(const ZRegister& zd, const SVEMemOperand& addr); + + // Bitwise AND predicates. + void and_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise AND vectors (predicated). + void and_(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise AND with immediate (unpredicated). + void and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise AND vectors (unpredicated). + void and_(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise AND predicates. + void ands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise AND reduction to scalar. + void andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Arithmetic shift right by immediate (predicated). + void asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Arithmetic shift right by 64-bit wide elements (predicated). + void asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Arithmetic shift right by immediate (unpredicated). + void asr(const ZRegister& zd, const ZRegister& zn, int shift); + + // Arithmetic shift right by 64-bit wide elements (unpredicated). + void asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Arithmetic shift right for divide by immediate (predicated). + void asrd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Reversed arithmetic shift right by vector (predicated). + void asrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise clear predicates. + void bic(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise clear vectors (predicated). + void bic(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise clear bits using immediate (unpredicated). + void bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise clear vectors (unpredicated). + void bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise clear predicates. + void bics(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break after first true condition. + void brka(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Break after first true condition. + void brkas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Break before first true condition. + void brkb(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Break before first true condition. + void brkbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Propagate break to next partition. + void brkn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Propagate break to next partition. + void brkns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break after first true condition, propagating from previous partition. + void brkpa(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break after first true condition, propagating from previous partition. + void brkpas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break before first true condition, propagating from previous partition. + void brkpb(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Break before first true condition, propagating from previous partition. + void brkpbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Conditionally extract element after last to general-purpose register. + void clasta(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm); + + // Conditionally extract element after last to SIMD&FP scalar register. + void clasta(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm); + + // Conditionally extract element after last to vector register. + void clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Conditionally extract last element to general-purpose register. + void clastb(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm); + + // Conditionally extract last element to SIMD&FP scalar register. + void clastb(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm); + + // Conditionally extract last element to vector register. + void clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Count leading sign bits (predicated). + void cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Count leading zero bits (predicated). + void clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + void cmp(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to 64-bit wide elements. + void cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7); + + // Compare vector to 64-bit wide elements. + void cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Compare vector to 64-bit wide elements. + void cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Compare vector to immediate. + void cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Logically invert boolean condition in vector (predicated). + void cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Count non-zero bits (predicated). + void cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Set scalar to multiple of predicate constraint element count. + void cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Set scalar to multiple of predicate constraint element count. + void cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Set scalar to multiple of predicate constraint element count. + void cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Set scalar to active predicate element count. + void cntp(const Register& xd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Set scalar to multiple of predicate constraint element count. + void cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1); + + // Shuffle active elements of vector to the right and fill with zero. + void compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn); + + // Copy signed integer immediate to vector elements (predicated). + void cpy(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1); + + // Copy general-purpose register to vector elements (predicated). + void cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn); + + // Copy SIMD&FP scalar register to vector elements (predicated). + void cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn); + + // Compare and terminate loop. + void ctermeq(const Register& rn, const Register& rm); + + // Compare and terminate loop. + void ctermne(const Register& rn, const Register& rm); + + // Decrement scalar by multiple of predicate constraint element count. + void decb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement scalar by multiple of predicate constraint element count. + void decd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement vector by multiple of predicate constraint element count. + void decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement scalar by multiple of predicate constraint element count. + void dech(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement vector by multiple of predicate constraint element count. + void dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement scalar by active predicate element count. + void decp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Decrement vector by active predicate element count. + void decp(const ZRegister& zdn, const PRegister& pg); + + // Decrement scalar by multiple of predicate constraint element count. + void decw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Decrement vector by multiple of predicate constraint element count. + void decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Broadcast general-purpose register to vector elements (unpredicated). + void dup(const ZRegister& zd, const Register& xn); + + // Broadcast indexed element to vector (unpredicated). + void dup(const ZRegister& zd, const ZRegister& zn, unsigned index); + + // As for movz/movk/movn, if the default shift of -1 is specified to dup, the + // assembler will pick an appropriate immediate and left shift that is + // equivalent to the immediate argument. If an explicit left shift is + // specified (0 or 8), the immediate must be a signed 8-bit integer. + + // Broadcast signed immediate to vector elements (unpredicated). + void dup(const ZRegister& zd, int imm8, int shift = -1); + + // Broadcast logical bitmask immediate to vector (unpredicated). + void dupm(const ZRegister& zd, uint64_t imm); + + // Bitwise exclusive OR with inverted immediate (unpredicated). + void eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise exclusive OR predicates. + void eor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise exclusive OR vectors (predicated). + void eor(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise exclusive OR with immediate (unpredicated). + void eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise exclusive OR vectors (unpredicated). + void eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise exclusive OR predicates. + void eors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise XOR reduction to scalar. + void eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Extract vector from pair of vectors. + void ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset); + + // Floating-point absolute difference (predicated). + void fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point absolute value (predicated). + void fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point absolute compare vectors. + void facge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point absolute compare vectors. + void facgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point add immediate (predicated). + void fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point add vector (predicated). + void fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point add vector (unpredicated). + void fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point add strictly-ordered reduction, accumulating in scalar. + void fadda(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm); + + // Floating-point add recursive reduction to scalar. + void faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point complex add with rotate (predicated). + void fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Floating-point compare vector with zero. + void fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point compare vector with zero. + void fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point compare vector with zero. + void fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point complex multiply-add with rotate (predicated). + void fcmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Floating-point complex multiply-add by indexed values with rotate. + void fcmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Floating-point compare vector with zero. + void fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vector with zero. + void fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vector with zero. + void fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero); + + // Floating-point compare vectors. + void fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point compare vectors. + void fcmuo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Copy floating-point immediate to vector elements (predicated). + void fcpy(const ZRegister& zd, const PRegisterM& pg, double imm); + + // Copy half-precision floating-point immediate to vector elements + // (predicated). + void fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm) { + fcpy(zd, pg, FPToDouble(imm, kIgnoreDefaultNaN)); + } + + // Floating-point convert precision (predicated). + void fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point convert to signed integer, rounding toward zero + // (predicated). + void fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point convert to unsigned integer, rounding toward zero + // (predicated). + void fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point divide by vector (predicated). + void fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point reversed divide by vector (predicated). + void fdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Broadcast floating-point immediate to vector elements. + void fdup(const ZRegister& zd, double imm); + + // Broadcast half-precision floating-point immediate to vector elements. + void fdup(const ZRegister& zd, Float16 imm) { + fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); + } + + // Floating-point exponential accelerator. + void fexpa(const ZRegister& zd, const ZRegister& zn); + + // Floating-point fused multiply-add vectors (predicated), writing + // multiplicand [Zdn = Za + Zdn * Zm]. + void fmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point maximum with immediate (predicated). + void fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point maximum (predicated). + void fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point maximum number with immediate (predicated). + void fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point maximum number (predicated). + void fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point maximum number recursive reduction to scalar. + void fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point maximum recursive reduction to scalar. + void fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point minimum with immediate (predicated). + void fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point minimum (predicated). + void fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum number with immediate (predicated). + void fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point minimum number (predicated). + void fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum number recursive reduction to scalar. + void fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point minimum recursive reduction to scalar. + void fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Floating-point fused multiply-add vectors (predicated), writing addend + // [Zda = Zda + Zn * Zm]. + void fmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point fused multiply-add by indexed elements + // (Zda = Zda + Zn * Zm[indexed]). + void fmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Floating-point fused multiply-subtract vectors (predicated), writing + // addend [Zda = Zda + -Zn * Zm]. + void fmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point fused multiply-subtract by indexed elements + // (Zda = Zda + -Zn * Zm[indexed]). + void fmls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Move 8-bit floating-point immediate to vector elements (unpredicated). + void fmov(const ZRegister& zd, double imm); + + // Move 8-bit floating-point immediate to vector elements (predicated). + void fmov(const ZRegister& zd, const PRegisterM& pg, double imm); + + // Floating-point fused multiply-subtract vectors (predicated), writing + // multiplicand [Zdn = Za + -Zdn * Zm]. + void fmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point multiply by immediate (predicated). + void fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point multiply vectors (predicated). + void fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point multiply by indexed elements. + void fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned index); + + // Floating-point multiply vectors (unpredicated). + void fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point multiply-extended vectors (predicated). + void fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point negate (predicated). + void fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point negated fused multiply-add vectors (predicated), writing + // multiplicand [Zdn = -Za + -Zdn * Zm]. + void fnmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point negated fused multiply-add vectors (predicated), writing + // addend [Zda = -Zda + -Zn * Zm]. + void fnmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point negated fused multiply-subtract vectors (predicated), + // writing addend [Zda = -Zda + Zn * Zm]. + void fnmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point negated fused multiply-subtract vectors (predicated), + // writing multiplicand [Zdn = -Za + Zdn * Zm]. + void fnmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Floating-point reciprocal estimate (unpredicated). + void frecpe(const ZRegister& zd, const ZRegister& zn); + + // Floating-point reciprocal step (unpredicated). + void frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point reciprocal exponent (predicated). + void frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point round to integral value (predicated). + void frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point reciprocal square root estimate (unpredicated). + void frsqrte(const ZRegister& zd, const ZRegister& zn); + + // Floating-point reciprocal square root step (unpredicated). + void frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point adjust exponent by vector (predicated). + void fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point square root (predicated). + void fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point subtract immediate (predicated). + void fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point subtract vectors (predicated). + void fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point subtract vectors (unpredicated). + void fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point reversed subtract from immediate (predicated). + void fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm); + + // Floating-point reversed subtract vectors (predicated). + void fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point trigonometric multiply-add coefficient. + void ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3); + + // Floating-point trigonometric starting value. + void ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point trigonometric select coefficient. + void ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Increment scalar by multiple of predicate constraint element count. + void incb(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment scalar by multiple of predicate constraint element count. + void incd(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment vector by multiple of predicate constraint element count. + void incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment scalar by multiple of predicate constraint element count. + void inch(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment vector by multiple of predicate constraint element count. + void inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment scalar by active predicate element count. + void incp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Increment vector by active predicate element count. + void incp(const ZRegister& zdn, const PRegister& pg); + + // Increment scalar by multiple of predicate constraint element count. + void incw(const Register& xdn, int pattern = SVE_ALL, int multiplier = 1); + + // Increment vector by multiple of predicate constraint element count. + void incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Create index starting from and incremented by immediate. + void index(const ZRegister& zd, int start, int step); + + // Create index starting from and incremented by general-purpose register. + void index(const ZRegister& zd, const Register& rn, const Register& rm); + + // Create index starting from general-purpose register and incremented by + // immediate. + void index(const ZRegister& zd, const Register& rn, int imm5); + + // Create index starting from immediate and incremented by general-purpose + // register. + void index(const ZRegister& zd, int imm5, const Register& rm); + + // Insert general-purpose register in shifted vector. + void insr(const ZRegister& zdn, const Register& rm); + + // Insert SIMD&FP scalar register in shifted vector. + void insr(const ZRegister& zdn, const VRegister& vm); + + // Extract element after last to general-purpose register. + void lasta(const Register& rd, const PRegister& pg, const ZRegister& zn); + + // Extract element after last to SIMD&FP scalar register. + void lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Extract last element to general-purpose register. + void lastb(const Register& rd, const PRegister& pg, const ZRegister& zn); + + // Extract last element to SIMD&FP scalar register. + void lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Contiguous/gather load bytes to vector. + void ld1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load halfwords to vector. + void ld1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load words to vector. + void ld1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load doublewords to vector. + void ld1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // TODO: Merge other loads into the SVEMemOperand versions. + + // Load and broadcast unsigned byte to vector. + void ld1rb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast unsigned halfword to vector. + void ld1rh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast unsigned word to vector. + void ld1rw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast doubleword to vector. + void ld1rd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate sixteen bytes. + void ld1rqb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate eight halfwords. + void ld1rqh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate four words. + void ld1rqw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate two doublewords. + void ld1rqd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate thirty-two bytes. + void ld1rob(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate sixteen halfwords. + void ld1roh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate eight words. + void ld1row(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load and replicate four doublewords. + void ld1rod(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast signed byte to vector. + void ld1rsb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast signed halfword to vector. + void ld1rsh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load and broadcast signed word to vector. + void ld1rsw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load signed bytes to vector. + void ld1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load signed halfwords to vector. + void ld1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous/gather load signed words to vector. + void ld1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // TODO: Merge other loads into the SVEMemOperand versions. + + // Contiguous load two-byte structures to two vectors. + void ld2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load two-halfword structures to two vectors. + void ld2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load two-word structures to two vectors. + void ld2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load two-doubleword structures to two vectors. + void ld2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-byte structures to three vectors. + void ld3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-halfword structures to three vectors. + void ld3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-word structures to three vectors. + void ld3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load three-doubleword structures to three vectors. + void ld3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-byte structures to four vectors. + void ld4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-halfword structures to four vectors. + void ld4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-word structures to four vectors. + void ld4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load four-doubleword structures to four vectors. + void ld4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault unsigned bytes to vector. + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault unsigned halfwords to vector. + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault unsigned words to vector. + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault doublewords to vector. + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault signed bytes to vector. + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault signed halfwords to vector. + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load first-fault signed words to vector. + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Gather load first-fault unsigned bytes to vector. + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault unsigned bytes to vector (immediate index). + void ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault doublewords to vector (vector index). + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault doublewords to vector (immediate index). + void ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault unsigned halfwords to vector (vector index). + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault unsigned halfwords to vector (immediate index). + void ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault signed bytes to vector (vector index). + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault signed bytes to vector (immediate index). + void ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault signed halfwords to vector (vector index). + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault signed halfwords to vector (immediate index). + void ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault signed words to vector (vector index). + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault signed words to vector (immediate index). + void ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Gather load first-fault unsigned words to vector (vector index). + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm); + + // Gather load first-fault unsigned words to vector (immediate index). + void ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5); + + // Contiguous load non-fault unsigned bytes to vector (immediate index). + void ldnf1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault doublewords to vector (immediate index). + void ldnf1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault unsigned halfwords to vector (immediate + // index). + void ldnf1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault signed bytes to vector (immediate index). + void ldnf1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault signed halfwords to vector (immediate index). + void ldnf1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault signed words to vector (immediate index). + void ldnf1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-fault unsigned words to vector (immediate index). + void ldnf1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal bytes to vector. + void ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal halfwords to vector. + void ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal words to vector. + void ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Contiguous load non-temporal doublewords to vector. + void ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Load SVE predicate/vector register. + void ldr(const CPURegister& rt, const SVEMemOperand& addr); + + // Logical shift left by immediate (predicated). + void lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Logical shift left by 64-bit wide elements (predicated). + void lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Logical shift left by immediate (unpredicated). + void lsl(const ZRegister& zd, const ZRegister& zn, int shift); + + // Logical shift left by 64-bit wide elements (unpredicated). + void lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Reversed logical shift left by vector (predicated). + void lslr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Logical shift right by immediate (predicated). + void lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Logical shift right by 64-bit wide elements (predicated). + void lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Logical shift right by immediate (unpredicated). + void lsr(const ZRegister& zd, const ZRegister& zn, int shift); + + // Logical shift right by 64-bit wide elements (unpredicated). + void lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Reversed logical shift right by vector (predicated). + void lsrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise invert predicate. + void not_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Bitwise invert predicate, setting the condition flags. + void nots(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Multiply-add vectors (predicated), writing multiplicand + // [Zdn = Za + Zdn * Zm]. + void mad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Multiply-add vectors (predicated), writing addend + // [Zda = Zda + Zn * Zm]. + void mla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Multiply-subtract vectors (predicated), writing addend + // [Zda = Zda - Zn * Zm]. + void mls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Move predicates (unpredicated) + void mov(const PRegister& pd, const PRegister& pn); + + // Move predicates (merging) + void mov(const PRegisterWithLaneSize& pd, + const PRegisterM& pg, + const PRegisterWithLaneSize& pn); + + // Move predicates (zeroing) + void mov(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Move general-purpose register to vector elements (unpredicated) + void mov(const ZRegister& zd, const Register& xn); + + // Move SIMD&FP scalar register to vector elements (unpredicated) + void mov(const ZRegister& zd, const VRegister& vn); + + // Move vector register (unpredicated) + void mov(const ZRegister& zd, const ZRegister& zn); + + // Move indexed element to vector elements (unpredicated) + void mov(const ZRegister& zd, const ZRegister& zn, unsigned index); + + // Move general-purpose register to vector elements (predicated) + void mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn); + + // Move SIMD&FP scalar register to vector elements (predicated) + void mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn); + + // Move vector elements (predicated) + void mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Move signed integer immediate to vector elements (predicated) + void mov(const ZRegister& zd, const PRegister& pg, int imm8, int shift = -1); + + // Move signed immediate to vector elements (unpredicated). + void mov(const ZRegister& zd, int imm8, int shift); + + // Move logical bitmask immediate to vector (unpredicated). + void mov(const ZRegister& zd, uint64_t imm); + + // Move predicate (unpredicated), setting the condition flags + void movs(const PRegister& pd, const PRegister& pn); + + // Move predicates (zeroing), setting the condition flags + void movs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn); + + // Move prefix (predicated). + void movprfx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn); + + // Move prefix (unpredicated). + void movprfx(const ZRegister& zd, const ZRegister& zn); + + // Multiply-subtract vectors (predicated), writing multiplicand + // [Zdn = Za - Zdn * Zm]. + void msb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za); + + // Multiply vectors (predicated). + void mul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Multiply by immediate (unpredicated). + void mul(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Bitwise NAND predicates. + void nand(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise NAND predicates. + void nands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Negate (predicated). + void neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Bitwise NOR predicates. + void nor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise NOR predicates. + void nors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise invert vector (predicated). + void not_(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Bitwise OR inverted predicate. + void orn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR inverted predicate. + void orns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR with inverted immediate (unpredicated). + void orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise OR predicate. + void orr(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR vectors (predicated). + void orr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise OR with immediate (unpredicated). + void orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm); + + // Bitwise OR vectors (unpredicated). + void orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise OR predicate. + void orrs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Bitwise OR reduction to scalar. + void orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Set all predicate elements to false. + void pfalse(const PRegisterWithLaneSize& pd); + + // Set the first active predicate element to true. + void pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Find next active predicate. + void pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + + // Prefetch bytes. + void prfb(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Prefetch halfwords. + void prfh(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Prefetch words. + void prfw(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Prefetch doublewords. + void prfd(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr); + + // Set condition flags for predicate. + void ptest(const PRegister& pg, const PRegisterWithLaneSize& pn); + + // Initialise predicate from named constraint. + void ptrue(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL); + + // Initialise predicate from named constraint. + void ptrues(const PRegisterWithLaneSize& pd, int pattern = SVE_ALL); + + // Unpack and widen half of predicate. + void punpkhi(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn); + + // Unpack and widen half of predicate. + void punpklo(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn); + + // Reverse bits (predicated). + void rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Read the first-fault register. + void rdffr(const PRegisterWithLaneSize& pd); + + // Return predicate of succesfully loaded elements. + void rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg); + + // Return predicate of succesfully loaded elements. + void rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg); + + // Read multiple of vector register size to scalar register. + void rdvl(const Register& xd, int imm6); + + // Reverse all elements in a predicate. + void rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn); + + // Reverse all elements in a vector (unpredicated). + void rev(const ZRegister& zd, const ZRegister& zn); + + // Reverse bytes / halfwords / words within elements (predicated). + void revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Reverse bytes / halfwords / words within elements (predicated). + void revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Reverse bytes / halfwords / words within elements (predicated). + void revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed absolute difference (predicated). + void sabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed add reduction to scalar. + void saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn); + + // Signed integer convert to floating-point (predicated). + void scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed divide (predicated). + void sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed reversed divide (predicated). + void sdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed dot product by indexed quadtuplet. + void sdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed dot product. + void sdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Conditionally select elements from two predicates. + void sel(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Conditionally select elements from two vectors. + void sel(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Initialise the first-fault register to all true. + void setffr(); + + // Signed maximum vectors (predicated). + void smax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed maximum with immediate (unpredicated). + void smax(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Signed maximum reduction to scalar. + void smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Signed minimum vectors (predicated). + void smin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed minimum with immediate (unpredicated). + void smin(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Signed minimum reduction to scalar. + void sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Signed multiply returning high half (predicated). + void smulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Splice two vectors under predicate control. + void splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Splice two vectors under predicate control (constructive). + void splice_con(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating add vectors (unpredicated). + void sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating add immediate (unpredicated). + void sqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Signed saturating decrement scalar by multiple of 8-bit predicate + // constraint element count. + void sqdecb(const Register& xd, + const Register& wn, + int pattern, + int multiplier); + + // Signed saturating decrement scalar by multiple of 8-bit predicate + // constraint element count. + void sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 64-bit predicate + // constraint element count. + void sqdecd(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 64-bit predicate + // constraint element count. + void sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement vector by multiple of 64-bit predicate + // constraint element count. + void sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 16-bit predicate + // constraint element count. + void sqdech(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 16-bit predicate + // constraint element count. + void sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement vector by multiple of 16-bit predicate + // constraint element count. + void sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement scalar by active predicate element count. + void sqdecp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn); + + // Signed saturating decrement scalar by active predicate element count. + void sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg); + + // Signed saturating decrement vector by active predicate element count. + void sqdecp(const ZRegister& zdn, const PRegister& pg); + + // Signed saturating decrement scalar by multiple of 32-bit predicate + // constraint element count. + void sqdecw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating decrement scalar by multiple of 32-bit predicate + // constraint element count. + void sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating decrement vector by multiple of 32-bit predicate + // constraint element count. + void sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by multiple of 8-bit predicate + // constraint element count. + void sqincb(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating increment scalar by multiple of 8-bit predicate + // constraint element count. + void sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by multiple of 64-bit predicate + // constraint element count. + void sqincd(const Register& xd, + const Register& wn, + int pattern, + int multiplier); + + // Signed saturating increment scalar by multiple of 64-bit predicate + // constraint element count. + void sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment vector by multiple of 64-bit predicate + // constraint element count. + void sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by multiple of 16-bit predicate + // constraint element count. + void sqinch(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating increment scalar by multiple of 16-bit predicate + // constraint element count. + void sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment vector by multiple of 16-bit predicate + // constraint element count. + void sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment scalar by active predicate element count. + void sqincp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn); + + // Signed saturating increment scalar by active predicate element count. + void sqincp(const Register& xdn, const PRegisterWithLaneSize& pg); + + // Signed saturating increment vector by active predicate element count. + void sqincp(const ZRegister& zdn, const PRegister& pg); + + // Signed saturating increment scalar by multiple of 32-bit predicate + // constraint element count. + void sqincw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1); + + // Signed saturating increment scalar by multiple of 32-bit predicate + // constraint element count. + void sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating increment vector by multiple of 32-bit predicate + // constraint element count. + void sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Signed saturating subtract vectors (unpredicated). + void sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating subtract immediate (unpredicated). + void sqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Contiguous/scatter store bytes from vector. + void st1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous/scatter store halfwords from vector. + void st1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous/scatter store words from vector. + void st1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous/scatter store doublewords from vector. + void st1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-byte structures from two vectors. + void st2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-halfword structures from two vectors. + void st2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-word structures from two vectors. + void st2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store two-doubleword structures from two vectors, + void st2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-byte structures from three vectors. + void st3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-halfword structures from three vectors. + void st3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-word structures from three vectors. + void st3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store three-doubleword structures from three vectors. + void st3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-byte structures from four vectors. + void st4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-halfword structures from four vectors. + void st4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-word structures from four vectors. + void st4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store four-doubleword structures from four vectors. + void st4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal bytes from vector. + void stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal halfwords from vector. + void stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal words from vector. + void stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Contiguous store non-temporal doublewords from vector. + void stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Store SVE predicate/vector register. + void str(const CPURegister& rt, const SVEMemOperand& addr); + + // Subtract vectors (predicated). + void sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Subtract vectors (unpredicated). + void sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract immediate (unpredicated). + void sub(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); + + // Reversed subtract vectors (predicated). + void subr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Reversed subtract from immediate (unpredicated). + void subr(const ZRegister& zd, const ZRegister& zn, int imm8, int shift = -1); + + // Signed unpack and extend half of vector. + void sunpkhi(const ZRegister& zd, const ZRegister& zn); + + // Signed unpack and extend half of vector. + void sunpklo(const ZRegister& zd, const ZRegister& zn); + + // Signed byte extend (predicated). + void sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed halfword extend (predicated). + void sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed word extend (predicated). + void sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Programmable table lookup/permute using vector of indices into a + // vector. + void tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleave even or odd elements from two predicates. + void trn1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave even or odd elements from two vectors. + void trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleave even or odd elements from two predicates. + void trn2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave even or odd elements from two vectors. + void trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference (predicated). + void uabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned add reduction to scalar. + void uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn); + + // Unsigned integer convert to floating-point (predicated). + void ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned divide (predicated). + void udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned reversed divide (predicated). + void udivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned dot product by indexed quadtuplet. + void udot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned dot product. + void udot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned maximum vectors (predicated). + void umax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned maximum with immediate (unpredicated). + void umax(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Unsigned maximum reduction to scalar. + void umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Unsigned minimum vectors (predicated). + void umin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned minimum with immediate (unpredicated). + void umin(const ZRegister& zd, const ZRegister& zn, int imm8); + + // Unsigned minimum reduction to scalar. + void uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn); + + // Unsigned multiply returning high half (predicated). + void umulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating add vectors (unpredicated). + void uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned saturating add immediate (unpredicated). + void uqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Unsigned saturating decrement scalar by multiple of 8-bit predicate + // constraint element count. + void uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement scalar by multiple of 64-bit predicate + // constraint element count. + void uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement vector by multiple of 64-bit predicate + // constraint element count. + void uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement scalar by multiple of 16-bit predicate + // constraint element count. + void uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement vector by multiple of 16-bit predicate + // constraint element count. + void uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement scalar by active predicate element count. + void uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Unsigned saturating decrement vector by active predicate element count. + void uqdecp(const ZRegister& zdn, const PRegister& pg); + + // Unsigned saturating decrement scalar by multiple of 32-bit predicate + // constraint element count. + void uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating decrement vector by multiple of 32-bit predicate + // constraint element count. + void uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by multiple of 8-bit predicate + // constraint element count. + void uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by multiple of 64-bit predicate + // constraint element count. + void uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment vector by multiple of 64-bit predicate + // constraint element count. + void uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by multiple of 16-bit predicate + // constraint element count. + void uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment vector by multiple of 16-bit predicate + // constraint element count. + void uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment scalar by active predicate element count. + void uqincp(const Register& rdn, const PRegisterWithLaneSize& pg); + + // Unsigned saturating increment vector by active predicate element count. + void uqincp(const ZRegister& zdn, const PRegister& pg); + + // Unsigned saturating increment scalar by multiple of 32-bit predicate + // constraint element count. + void uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating increment vector by multiple of 32-bit predicate + // constraint element count. + void uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1); + + // Unsigned saturating subtract vectors (unpredicated). + void uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned saturating subtract immediate (unpredicated). + void uqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift = -1); + + // Unsigned unpack and extend half of vector. + void uunpkhi(const ZRegister& zd, const ZRegister& zn); + + // Unsigned unpack and extend half of vector. + void uunpklo(const ZRegister& zd, const ZRegister& zn); + + // Unsigned byte extend (predicated). + void uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned halfword extend (predicated). + void uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned word extend (predicated). + void uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Concatenate even or odd elements from two predicates. + void uzp1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Concatenate even or odd elements from two vectors. + void uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Concatenate even or odd elements from two predicates. + void uzp2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Concatenate even or odd elements from two vectors. + void uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // While incrementing signed scalar less than or equal to scalar. + void whilele(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While incrementing unsigned scalar lower than scalar. + void whilelo(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While incrementing unsigned scalar lower or same as scalar. + void whilels(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While incrementing signed scalar less than scalar. + void whilelt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // Write the first-fault register. + void wrffr(const PRegisterWithLaneSize& pn); + + // Interleave elements from two half predicates. + void zip1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave elements from two half vectors. + void zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleave elements from two half predicates. + void zip2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm); + + // Interleave elements from two half vectors. + void zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Add with carry long (bottom). + void adclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Add with carry long (top). + void adclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Add narrow high part (bottom). + void addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Add narrow high part (top). + void addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Add pairwise. + void addp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Bitwise clear and exclusive OR. + void bcax(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Scatter lower bits into positions selected by bitmask. + void bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Gather lower bits from positions selected by bitmask. + void bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Group bits to right or left as selected by bitmask. + void bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise select. + void bsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Bitwise select with first input inverted. + void bsl1n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Bitwise select with second input inverted. + void bsl2n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Complex integer add with rotate. + void cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Complex integer dot product (indexed). + void cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Complex integer dot product. + void cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Complex integer multiply-add with rotate (indexed). + void cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Complex integer multiply-add with rotate. + void cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Bitwise exclusive OR of three vectors. + void eor3(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Interleaving exclusive OR (bottom, top). + void eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Interleaving exclusive OR (top, bottom). + void eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Floating-point add pairwise. + void faddp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point up convert long (top, predicated). + void fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point down convert and narrow (top, predicated). + void fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point down convert, rounding to odd (predicated). + void fcvtx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point down convert, rounding to odd (top, predicated). + void fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point base 2 logarithm as integer. + void flogb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Floating-point maximum number pairwise. + void fmaxnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point maximum pairwise. + void fmaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum number pairwise. + void fminnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point minimum pairwise. + void fminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Half-precision floating-point multiply-add long to single-precision + // (bottom). + void fmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Half-precision floating-point multiply-add long to single-precision + // (top). + void fmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Half-precision floating-point multiply-subtract long from + // single-precision (bottom). + void fmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Half-precision floating-point multiply-subtract long from + // single-precision (top, indexed). + void fmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-add long to single-precision + // (bottom, indexed). + void fmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-add long to single-precision + // (top, indexed). + void fmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-subtract long from + // single-precision (bottom, indexed). + void fmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Half-precision floating-point multiply-subtract long from + // single-precision (top). + void fmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Count matching elements in vector. + void histcnt(const ZRegister& zd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Count matching elements in vector segments. + void histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Gather load non-temporal signed bytes. + void ldnt1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Gather load non-temporal signed halfwords. + void ldnt1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Gather load non-temporal signed words. + void ldnt1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Detect any matching elements, setting the condition flags. + void match(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Multiply-add to accumulator (indexed). + void mla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Multiply-subtract from accumulator (indexed). + void mls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Multiply (indexed). + void mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Multiply vectors (unpredicated). + void mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Bitwise inverted select. + void nbsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + + // Detect no matching elements, setting the condition flags. + void nmatch(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Polynomial multiply vectors (unpredicated). + void pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Polynomial multiply long (bottom). + void pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Polynomial multiply long (top). + void pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding add narrow high part (bottom). + void raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding add narrow high part (top). + void raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding shift right narrow by immediate (bottom). + void rshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Rounding shift right narrow by immediate (top). + void rshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Rounding subtract narrow high part (bottom). + void rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Rounding subtract narrow high part (top). + void rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference and accumulate. + void saba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference and accumulate long (bottom). + void sabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference and accumulate long (top). + void sabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference long (bottom). + void sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed absolute difference long (top). + void sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add and accumulate long pairwise. + void sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn); + + // Signed add long (bottom). + void saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add long (bottom + top). + void saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add long (top). + void saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add wide (bottom). + void saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed add wide (top). + void saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract with carry long (bottom). + void sbclb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Subtract with carry long (top). + void sbclt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed halving addition. + void shadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Shift right narrow by immediate (bottom). + void shrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Shift right narrow by immediate (top). + void shrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed halving subtract. + void shsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed halving subtract reversed vectors. + void shsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Shift left and insert (immediate). + void sli(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed maximum pairwise. + void smaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed minimum pairwise. + void sminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed multiply-add long to accumulator (bottom, indexed). + void smlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-add long to accumulator (bottom). + void smlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply-add long to accumulator (top, indexed). + void smlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-add long to accumulator (top). + void smlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply-subtract long from accumulator (bottom, indexed). + void smlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-subtract long from accumulator (bottom). + void smlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply-subtract long from accumulator (top, indexed). + void smlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply-subtract long from accumulator (top). + void smlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply returning high half (unpredicated). + void smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply long (bottom, indexed). + void smullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply long (bottom). + void smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed multiply long (top, indexed). + void smullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed multiply long (top). + void smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating absolute value. + void sqabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Signed saturating addition (predicated). + void sqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Saturating complex integer add with rotate. + void sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Signed saturating doubling multiply-add long to accumulator (bottom, + // indexed). + void sqdmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-add long to accumulator (bottom). + void sqdmlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply-add long to accumulator (bottom x + // top). + void sqdmlalbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating doubling multiply-add long to accumulator (top, + // indexed). + void sqdmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-add long to accumulator (top). + void sqdmlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply-subtract long from accumulator + // (bottom, indexed). + void sqdmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-subtract long from accumulator + // (bottom). + void sqdmlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply-subtract long from accumulator + // (bottom x top). + void sqdmlslbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating doubling multiply-subtract long from accumulator + // (top, indexed). + void sqdmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply-subtract long from accumulator + // (top). + void sqdmlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply high (indexed). + void sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply high (unpredicated). + void sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply long (bottom, indexed). + void sqdmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply long (bottom). + void sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating doubling multiply long (top, indexed). + void sqdmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating doubling multiply long (top). + void sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating negate. + void sqneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Saturating rounding doubling complex integer multiply-add high with + // rotate (indexed). + void sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + + // Saturating rounding doubling complex integer multiply-add high with + // rotate. + void sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Signed saturating rounding doubling multiply-add high to accumulator + // (indexed). + void sqrdmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating rounding doubling multiply-add high to accumulator + // (unpredicated). + void sqrdmlah(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating rounding doubling multiply-subtract high from + // accumulator (indexed). + void sqrdmlsh(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating rounding doubling multiply-subtract high from + // accumulator (unpredicated). + void sqrdmlsh(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating rounding doubling multiply high (indexed). + void sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed saturating rounding doubling multiply high (unpredicated). + void sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating rounding shift left by vector (predicated). + void sqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating rounding shift left reversed vectors (predicated). + void sqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating rounding shift right narrow by immediate (bottom). + void sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating rounding shift right narrow by immediate (top). + void sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating rounding shift right unsigned narrow by immediate + // (bottom). + void sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating rounding shift right unsigned narrow by immediate + // (top). + void sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift left by immediate. + void sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Signed saturating shift left by vector (predicated). + void sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating shift left reversed vectors (predicated). + void sqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating shift left unsigned by immediate. + void sqshlu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Signed saturating shift right narrow by immediate (bottom). + void sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift right narrow by immediate (top). + void sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift right unsigned narrow by immediate (bottom). + void sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating shift right unsigned narrow by immediate (top). + void sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed saturating subtraction (predicated). + void sqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating subtraction reversed vectors (predicated). + void sqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed saturating extract narrow (bottom). + void sqxtnb(const ZRegister& zd, const ZRegister& zn); + + // Signed saturating extract narrow (top). + void sqxtnt(const ZRegister& zd, const ZRegister& zn); + + // Signed saturating unsigned extract narrow (bottom). + void sqxtunb(const ZRegister& zd, const ZRegister& zn); + + // Signed saturating unsigned extract narrow (top). + void sqxtunt(const ZRegister& zd, const ZRegister& zn); + + // Signed rounding halving addition. + void srhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Shift right and insert (immediate). + void sri(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed rounding shift left by vector (predicated). + void srshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed rounding shift left reversed vectors (predicated). + void srshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Signed rounding shift right by immediate. + void srshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Signed rounding shift right and accumulate (immediate). + void srsra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Signed shift left long by immediate (bottom). + void sshllb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed shift left long by immediate (top). + void sshllt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Signed shift right and accumulate (immediate). + void ssra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Signed subtract long (bottom). + void ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract long (bottom - top). + void ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract long (top). + void ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract long (top - bottom). + void ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract wide (bottom). + void ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed subtract wide (top). + void ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract narrow high part (bottom). + void subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Subtract narrow high part (top). + void subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Signed saturating addition of unsigned value. + void suqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Programmable table lookup in one or two vector table (zeroing). + void tbl(const ZRegister& zd, + const ZRegister& zn1, + const ZRegister& zn2, + const ZRegister& zm); + + // Programmable table lookup in single vector table (merging). + void tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference and accumulate. + void uaba(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference and accumulate long (bottom). + void uabalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference and accumulate long (top). + void uabalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference long (bottom). + void uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned absolute difference long (top). + void uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add and accumulate long pairwise. + void uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned add long (bottom). + void uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add long (top). + void uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add wide (bottom). + void uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned add wide (top). + void uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned halving addition. + void uhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned halving subtract. + void uhsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned halving subtract reversed vectors. + void uhsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned maximum pairwise. + void umaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned minimum pairwise. + void uminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned multiply-add long to accumulator (bottom, indexed). + void umlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-add long to accumulator (bottom). + void umlalb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply-add long to accumulator (top, indexed). + void umlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-add long to accumulator (top). + void umlalt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply-subtract long from accumulator (bottom, indexed). + void umlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-subtract long from accumulator (bottom). + void umlslb(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply-subtract long from accumulator (top, indexed). + void umlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply-subtract long from accumulator (top). + void umlslt(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply returning high half (unpredicated). + void umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply long (bottom, indexed). + void umullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply long (bottom). + void umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned multiply long (top, indexed). + void umullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Unsigned multiply long (top). + void umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned saturating addition (predicated). + void uqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating rounding shift left by vector (predicated). + void uqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating rounding shift left reversed vectors (predicated). + void uqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating rounding shift right narrow by immediate (bottom). + void uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating rounding shift right narrow by immediate (top). + void uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating shift left by immediate. + void uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Unsigned saturating shift left by vector (predicated). + void uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating shift left reversed vectors (predicated). + void uqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating shift right narrow by immediate (bottom). + void uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating shift right narrow by immediate (top). + void uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating subtraction (predicated). + void uqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating subtraction reversed vectors (predicated). + void uqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned saturating extract narrow (bottom). + void uqxtnb(const ZRegister& zd, const ZRegister& zn); + + // Unsigned saturating extract narrow (top). + void uqxtnt(const ZRegister& zd, const ZRegister& zn); + + // Unsigned reciprocal estimate (predicated). + void urecpe(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned rounding halving addition. + void urhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned rounding shift left by vector (predicated). + void urshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned rounding shift left reversed vectors (predicated). + void urshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned rounding shift right by immediate. + void urshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift); + + // Unsigned reciprocal square root estimate (predicated). + void ursqrte(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn); + + // Unsigned rounding shift right and accumulate (immediate). + void ursra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Unsigned shift left long by immediate (bottom). + void ushllb(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned shift left long by immediate (top). + void ushllt(const ZRegister& zd, const ZRegister& zn, int shift); + + // Unsigned saturating addition of signed value. + void usqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Unsigned shift right and accumulate (immediate). + void usra(const ZRegister& zda, const ZRegister& zn, int shift); + + // Unsigned subtract long (bottom). + void usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned subtract long (top). + void usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned subtract wide (bottom). + void usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // Unsigned subtract wide (top). + void usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm); + + // While decrementing signed scalar greater than or equal to scalar. + void whilege(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While decrementing signed scalar greater than scalar. + void whilegt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While decrementing unsigned scalar higher than scalar. + void whilehi(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While decrementing unsigned scalar higher or same as scalar. + void whilehs(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While free of read-after-write conflicts. + void whilerw(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // While free of write-after-read/write conflicts. + void whilewr(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm); + + // Bitwise exclusive OR and rotate right by immediate. + void xar(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int shift); + + // Floating-point matrix multiply-accumulate. + void fmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Signed integer matrix multiply-accumulate. + void smmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned by signed integer matrix multiply-accumulate. + void usmmla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned integer matrix multiply-accumulate. + void ummla(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned by signed integer dot product. + void usdot(const ZRegister& zda, const ZRegister& zn, const ZRegister& zm); + + // Unsigned by signed integer indexed dot product. + void usdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Signed by unsigned integer indexed dot product. + void sudot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // Add with Tag. + void addg(const Register& xd, const Register& xn, int offset, int tag_offset); + + // Tag Mask Insert. + void gmi(const Register& xd, const Register& xn, const Register& xm); + + // Insert Random Tag. + void irg(const Register& xd, const Register& xn, const Register& xm = xzr); + + // Load Allocation Tag. + void ldg(const Register& xt, const MemOperand& addr); + + void StoreTagHelper(const Register& xt, const MemOperand& addr, Instr op); + + // Store Allocation Tags. + void st2g(const Register& xt, const MemOperand& addr); + + // Store Allocation Tag. + void stg(const Register& xt, const MemOperand& addr); + + // Store Allocation Tag and Pair of registers. + void stgp(const Register& xt1, const Register& xt2, const MemOperand& addr); + + // Store Allocation Tags, Zeroing. + void stz2g(const Register& xt, const MemOperand& addr); + + // Store Allocation Tag, Zeroing. + void stzg(const Register& xt, const MemOperand& addr); + + // Subtract with Tag. + void subg(const Register& xd, const Register& xn, int offset, int tag_offset); + + // Subtract Pointer. + void subp(const Register& xd, const Register& xn, const Register& xm); + + // Subtract Pointer, setting Flags. + void subps(const Register& xd, const Register& xn, const Register& xm); + + // Compare with Tag. + void cmpp(const Register& xn, const Register& xm) { subps(xzr, xn, xm); } + + // Memory Copy. + void cpye(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, reads and writes non-temporal. + void cpyen(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, reads non-temporal. + void cpyern(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, writes non-temporal. + void cpyewn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only. + void cpyfe(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, reads and writes non-temporal. + void cpyfen(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, reads non-temporal. + void cpyfern(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, writes non-temporal. + void cpyfewn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only. + void cpyfm(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, reads and writes non-temporal. + void cpyfmn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, reads non-temporal. + void cpyfmrn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, writes non-temporal. + void cpyfmwn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only. + void cpyfp(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, reads and writes non-temporal. + void cpyfpn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, reads non-temporal. + void cpyfprn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy Forward-only, writes non-temporal. + void cpyfpwn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy. + void cpym(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, reads and writes non-temporal. + void cpymn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, reads non-temporal. + void cpymrn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, writes non-temporal. + void cpymwn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy. + void cpyp(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, reads and writes non-temporal. + void cpypn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, reads non-temporal. + void cpyprn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Copy, writes non-temporal. + void cpypwn(const Register& rd, const Register& rs, const Register& rn); + + // Memory Set. + void sete(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set, non-temporal. + void seten(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set with tag setting. + void setge(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set with tag setting, non-temporal. + void setgen(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set with tag setting. + void setgm(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set with tag setting, non-temporal. + void setgmn(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set with tag setting. + void setgp(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set with tag setting, non-temporal. + void setgpn(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set. + void setm(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set, non-temporal. + void setmn(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set. + void setp(const Register& rd, const Register& rn, const Register& rs); + + // Memory Set, non-temporal. + void setpn(const Register& rd, const Register& rn, const Register& rs); + + // Absolute value. + void abs(const Register& rd, const Register& rn); + + // Count bits. + void cnt(const Register& rd, const Register& rn); + + // Count Trailing Zeros. + void ctz(const Register& rd, const Register& rn); + + // Signed Maximum. + void smax(const Register& rd, const Register& rn, const Operand& op); + + // Signed Minimum. + void smin(const Register& rd, const Register& rn, const Operand& op); + + // Unsigned Maximum. + void umax(const Register& rd, const Register& rn, const Operand& op); + + // Unsigned Minimum. + void umin(const Register& rd, const Register& rn, const Operand& op); + + // Emit generic instructions. + + // Emit raw instructions into the instruction stream. + void dci(Instr raw_inst) { Emit(raw_inst); } + + // Emit 32 bits of data into the instruction stream. + void dc32(uint32_t data) { dc(data); } + + // Emit 64 bits of data into the instruction stream. + void dc64(uint64_t data) { dc(data); } + + // Emit data in the instruction stream. + template + void dc(T data) { + VIXL_ASSERT(AllowAssembler()); + GetBuffer()->Emit(data); + } + + // Copy a string into the instruction stream, including the terminating NULL + // character. The instruction pointer is then aligned correctly for + // subsequent instructions. + void EmitString(const char* string) { + VIXL_ASSERT(string != NULL); + VIXL_ASSERT(AllowAssembler()); + + GetBuffer()->EmitString(string); + GetBuffer()->Align(); + } + + // Code generation helpers. + static bool OneInstrMoveImmediateHelper(Assembler* assm, + const Register& dst, + uint64_t imm); + + // Register encoding. + template + static Instr Rx(CPURegister rx) { + VIXL_ASSERT(rx.GetCode() != kSPRegInternalCode); + return ImmUnsignedField(rx.GetCode()); + } + +#define CPU_REGISTER_FIELD_NAMES(V) V(d) V(n) V(m) V(a) V(t) V(t2) V(s) +#define REGISTER_ENCODER(N) \ + static Instr R##N(CPURegister r##N) { \ + return Rx(r##N); \ + } + CPU_REGISTER_FIELD_NAMES(REGISTER_ENCODER) +#undef REGISTER_ENCODER +#undef CPU_REGISTER_FIELD_NAMES + + static Instr RmNot31(CPURegister rm) { + VIXL_ASSERT(rm.GetCode() != kSPRegInternalCode); + VIXL_ASSERT(!rm.IsZero()); + return Rm(rm); + } + + // These encoding functions allow the stack pointer to be encoded, and + // disallow the zero register. + static Instr RdSP(Register rd) { + VIXL_ASSERT(!rd.IsZero()); + return (rd.GetCode() & kRegCodeMask) << Rd_offset; + } + + static Instr RnSP(Register rn) { + VIXL_ASSERT(!rn.IsZero()); + return (rn.GetCode() & kRegCodeMask) << Rn_offset; + } + + static Instr RmSP(Register rm) { + VIXL_ASSERT(!rm.IsZero()); + return (rm.GetCode() & kRegCodeMask) << Rm_offset; + } + + static Instr Pd(PRegister pd) { + return Rx(pd); + } + + static Instr Pm(PRegister pm) { + return Rx(pm); + } + + static Instr Pn(PRegister pn) { + return Rx(pn); + } + + static Instr PgLow8(PRegister pg) { + // Governing predicates can be merging, zeroing, or unqualified. They should + // never have a lane size. + VIXL_ASSERT(!pg.HasLaneSize()); + return Rx(pg); + } + + template + static Instr Pg(PRegister pg) { + // Governing predicates can be merging, zeroing, or unqualified. They should + // never have a lane size. + VIXL_ASSERT(!pg.HasLaneSize()); + return Rx(pg); + } + + // Flags encoding. + static Instr Flags(FlagsUpdate S) { + if (S == SetFlags) { + return 1 << FlagsUpdate_offset; + } else if (S == LeaveFlags) { + return 0 << FlagsUpdate_offset; + } + VIXL_UNREACHABLE(); + return 0; + } + + static Instr Cond(Condition cond) { return cond << Condition_offset; } + + // Generic immediate encoding. + template + static Instr ImmField(int64_t imm) { + VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0)); + VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte)); + int fieldsize = hibit - lobit + 1; + VIXL_ASSERT(IsIntN(fieldsize, imm)); + return static_cast(TruncateToUintN(fieldsize, imm) << lobit); + } + + // For unsigned immediate encoding. + // TODO: Handle signed and unsigned immediate in satisfactory way. + template + static Instr ImmUnsignedField(uint64_t imm) { + VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0)); + VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte)); + VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm)); + return static_cast(imm << lobit); + } + + // PC-relative address encoding. + static Instr ImmPCRelAddress(int64_t imm21) { + VIXL_ASSERT(IsInt21(imm21)); + Instr imm = static_cast(TruncateToUint21(imm21)); + Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset; + Instr immlo = imm << ImmPCRelLo_offset; + return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask); + } + + // Branch encoding. + static Instr ImmUncondBranch(int64_t imm26) { + VIXL_ASSERT(IsInt26(imm26)); + return TruncateToUint26(imm26) << ImmUncondBranch_offset; + } + + static Instr ImmCondBranch(int64_t imm19) { + VIXL_ASSERT(IsInt19(imm19)); + return TruncateToUint19(imm19) << ImmCondBranch_offset; + } + + static Instr ImmCmpBranch(int64_t imm19) { + VIXL_ASSERT(IsInt19(imm19)); + return TruncateToUint19(imm19) << ImmCmpBranch_offset; + } + + static Instr ImmTestBranch(int64_t imm14) { + VIXL_ASSERT(IsInt14(imm14)); + return TruncateToUint14(imm14) << ImmTestBranch_offset; + } + + static Instr ImmTestBranchBit(unsigned bit_pos) { + VIXL_ASSERT(IsUint6(bit_pos)); + // Subtract five from the shift offset, as we need bit 5 from bit_pos. + unsigned bit5 = bit_pos << (ImmTestBranchBit5_offset - 5); + unsigned bit40 = bit_pos << ImmTestBranchBit40_offset; + bit5 &= ImmTestBranchBit5_mask; + bit40 &= ImmTestBranchBit40_mask; + return bit5 | bit40; + } + + // Data Processing encoding. + static Instr SF(Register rd) { + return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits; + } + + static Instr ImmAddSub(int imm) { + VIXL_ASSERT(IsImmAddSub(imm)); + if (IsUint12(imm)) { // No shift required. + imm <<= ImmAddSub_offset; + } else { + imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset); + } + return imm; + } + + static Instr SVEImmSetBits(unsigned imms, unsigned lane_size) { + VIXL_ASSERT(IsUint6(imms)); + VIXL_ASSERT((lane_size == kDRegSize) || IsUint6(imms + 3)); + USE(lane_size); + return imms << SVEImmSetBits_offset; + } + + static Instr SVEImmRotate(unsigned immr, unsigned lane_size) { + VIXL_ASSERT(IsUintN(WhichPowerOf2(lane_size), immr)); + USE(lane_size); + return immr << SVEImmRotate_offset; + } + + static Instr SVEBitN(unsigned bitn) { + VIXL_ASSERT(IsUint1(bitn)); + return bitn << SVEBitN_offset; + } + + static Instr SVEDtype(unsigned msize_in_bytes_log2, + unsigned esize_in_bytes_log2, + bool is_signed, + int dtype_h_lsb = 23, + int dtype_l_lsb = 21) { + VIXL_ASSERT(msize_in_bytes_log2 <= kDRegSizeInBytesLog2); + VIXL_ASSERT(esize_in_bytes_log2 <= kDRegSizeInBytesLog2); + Instr dtype_h = msize_in_bytes_log2; + Instr dtype_l = esize_in_bytes_log2; + // Signed forms use the encodings where msize would be greater than esize. + if (is_signed) { + dtype_h = dtype_h ^ 0x3; + dtype_l = dtype_l ^ 0x3; + } + VIXL_ASSERT(IsUint2(dtype_h)); + VIXL_ASSERT(IsUint2(dtype_l)); + VIXL_ASSERT((dtype_h > dtype_l) == is_signed); + + return (dtype_h << dtype_h_lsb) | (dtype_l << dtype_l_lsb); + } + + static Instr SVEDtypeSplit(unsigned msize_in_bytes_log2, + unsigned esize_in_bytes_log2, + bool is_signed) { + return SVEDtype(msize_in_bytes_log2, + esize_in_bytes_log2, + is_signed, + 23, + 13); + } + + static Instr ImmS(unsigned imms, unsigned reg_size) { + VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) || + ((reg_size == kWRegSize) && IsUint5(imms))); + USE(reg_size); + return imms << ImmS_offset; + } + + static Instr ImmR(unsigned immr, unsigned reg_size) { + VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || + ((reg_size == kWRegSize) && IsUint5(immr))); + USE(reg_size); + VIXL_ASSERT(IsUint6(immr)); + return immr << ImmR_offset; + } + + static Instr ImmSetBits(unsigned imms, unsigned reg_size) { + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + VIXL_ASSERT(IsUint6(imms)); + VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3)); + USE(reg_size); + return imms << ImmSetBits_offset; + } + + static Instr ImmRotate(unsigned immr, unsigned reg_size) { + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) || + ((reg_size == kWRegSize) && IsUint5(immr))); + USE(reg_size); + return immr << ImmRotate_offset; + } + + static Instr ImmLLiteral(int64_t imm19) { + VIXL_ASSERT(IsInt19(imm19)); + return TruncateToUint19(imm19) << ImmLLiteral_offset; + } + + static Instr BitN(unsigned bitn, unsigned reg_size) { + VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize)); + VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0)); + USE(reg_size); + return bitn << BitN_offset; + } + + static Instr ShiftDP(Shift shift) { + VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR); + return shift << ShiftDP_offset; + } + + static Instr ImmDPShift(unsigned amount) { + VIXL_ASSERT(IsUint6(amount)); + return amount << ImmDPShift_offset; + } + + static Instr ExtendMode(Extend extend) { return extend << ExtendMode_offset; } + + static Instr ImmExtendShift(unsigned left_shift) { + VIXL_ASSERT(left_shift <= 4); + return left_shift << ImmExtendShift_offset; + } + + static Instr ImmCondCmp(unsigned imm) { + VIXL_ASSERT(IsUint5(imm)); + return imm << ImmCondCmp_offset; + } + + static Instr Nzcv(StatusFlags nzcv) { + return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset; + } + + // MemOperand offset encoding. + static Instr ImmLSUnsigned(int64_t imm12) { + VIXL_ASSERT(IsUint12(imm12)); + return TruncateToUint12(imm12) << ImmLSUnsigned_offset; + } + + static Instr ImmLS(int64_t imm9) { + VIXL_ASSERT(IsInt9(imm9)); + return TruncateToUint9(imm9) << ImmLS_offset; + } + + static Instr ImmLSPair(int64_t imm7, unsigned access_size_in_bytes_log2) { + const auto access_size_in_bytes = 1U << access_size_in_bytes_log2; + VIXL_ASSERT(IsMultiple(imm7, access_size_in_bytes)); + int64_t scaled_imm7 = imm7 / access_size_in_bytes; + VIXL_ASSERT(IsInt7(scaled_imm7)); + return TruncateToUint7(scaled_imm7) << ImmLSPair_offset; + } + + static Instr ImmShiftLS(unsigned shift_amount) { + VIXL_ASSERT(IsUint1(shift_amount)); + return shift_amount << ImmShiftLS_offset; + } + + static Instr ImmLSPAC(int64_t imm10) { + VIXL_ASSERT(IsMultiple(imm10, 1 << 3)); + int64_t scaled_imm10 = imm10 / (1 << 3); + VIXL_ASSERT(IsInt10(scaled_imm10)); + uint32_t s_bit = (scaled_imm10 >> 9) & 1; + return (s_bit << ImmLSPACHi_offset) | + (TruncateToUint9(scaled_imm10) << ImmLSPACLo_offset); + } + + static Instr ImmPrefetchOperation(int imm5) { + VIXL_ASSERT(IsUint5(imm5)); + return imm5 << ImmPrefetchOperation_offset; + } + + static Instr ImmException(int imm16) { + VIXL_ASSERT(IsUint16(imm16)); + return imm16 << ImmException_offset; + } + + static Instr ImmUdf(int imm16) { + VIXL_ASSERT(IsUint16(imm16)); + return imm16 << ImmUdf_offset; + } + + static Instr ImmSystemRegister(int imm16) { + VIXL_ASSERT(IsUint16(imm16)); + return imm16 << ImmSystemRegister_offset; + } + + static Instr ImmRMIFRotation(int imm6) { + VIXL_ASSERT(IsUint6(imm6)); + return imm6 << ImmRMIFRotation_offset; + } + + static Instr ImmHint(int imm7) { + VIXL_ASSERT(IsUint7(imm7)); + return imm7 << ImmHint_offset; + } + + static Instr CRm(int imm4) { + VIXL_ASSERT(IsUint4(imm4)); + return imm4 << CRm_offset; + } + + static Instr CRn(int imm4) { + VIXL_ASSERT(IsUint4(imm4)); + return imm4 << CRn_offset; + } + + static Instr SysOp(int imm14) { + VIXL_ASSERT(IsUint14(imm14)); + return imm14 << SysOp_offset; + } + + static Instr ImmSysOp1(int imm3) { + VIXL_ASSERT(IsUint3(imm3)); + return imm3 << SysOp1_offset; + } + + static Instr ImmSysOp2(int imm3) { + VIXL_ASSERT(IsUint3(imm3)); + return imm3 << SysOp2_offset; + } + + static Instr ImmBarrierDomain(int imm2) { + VIXL_ASSERT(IsUint2(imm2)); + return imm2 << ImmBarrierDomain_offset; + } + + static Instr ImmBarrierType(int imm2) { + VIXL_ASSERT(IsUint2(imm2)); + return imm2 << ImmBarrierType_offset; + } + + // Move immediates encoding. + static Instr ImmMoveWide(uint64_t imm) { + VIXL_ASSERT(IsUint16(imm)); + return static_cast(imm << ImmMoveWide_offset); + } + + static Instr ShiftMoveWide(int64_t shift) { + VIXL_ASSERT(IsUint2(shift)); + return static_cast(shift << ShiftMoveWide_offset); + } + + // FP Immediates. + static Instr ImmFP16(Float16 imm); + static Instr ImmFP32(float imm); + static Instr ImmFP64(double imm); + + // FP register type. + static Instr FPType(VRegister fd) { + VIXL_ASSERT(fd.IsScalar()); + switch (fd.GetSizeInBits()) { + case 16: + return FP16; + case 32: + return FP32; + case 64: + return FP64; + default: + VIXL_UNREACHABLE(); + return 0; + } + } + + static Instr FPScale(unsigned scale) { + VIXL_ASSERT(IsUint6(scale)); + return scale << FPScale_offset; + } + + // Immediate field checking helpers. + static bool IsImmAddSub(int64_t immediate); + static bool IsImmConditionalCompare(int64_t immediate); + static bool IsImmFP16(Float16 imm); + + static bool IsImmFP32(float imm) { return IsImmFP32(FloatToRawbits(imm)); } + + static bool IsImmFP32(uint32_t bits); + + static bool IsImmFP64(double imm) { return IsImmFP64(DoubleToRawbits(imm)); } + + static bool IsImmFP64(uint64_t bits); + static bool IsImmLogical(uint64_t value, + unsigned width, + unsigned* n = NULL, + unsigned* imm_s = NULL, + unsigned* imm_r = NULL); + static bool IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2); + static bool IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2); + static bool IsImmLSUnscaled(int64_t offset); + static bool IsImmMovn(uint64_t imm, unsigned reg_size); + static bool IsImmMovz(uint64_t imm, unsigned reg_size); + + // Instruction bits for vector format in data processing operations. + static Instr VFormat(VRegister vd) { + if (vd.Is64Bits()) { + switch (vd.GetLanes()) { + case 2: + return NEON_2S; + case 4: + return NEON_4H; + case 8: + return NEON_8B; + default: + return 0xffffffff; + } + } else { + VIXL_ASSERT(vd.Is128Bits()); + switch (vd.GetLanes()) { + case 2: + return NEON_2D; + case 4: + return NEON_4S; + case 8: + return NEON_8H; + case 16: + return NEON_16B; + default: + return 0xffffffff; + } + } + } + + // Instruction bits for vector format in floating point data processing + // operations. + static Instr FPFormat(VRegister vd) { + switch (vd.GetLanes()) { + case 1: + // Floating point scalar formats. + switch (vd.GetSizeInBits()) { + case 16: + return FP16; + case 32: + return FP32; + case 64: + return FP64; + default: + VIXL_UNREACHABLE(); + } + break; + case 2: + // Two lane floating point vector formats. + switch (vd.GetSizeInBits()) { + case 64: + return NEON_FP_2S; + case 128: + return NEON_FP_2D; + default: + VIXL_UNREACHABLE(); + } + break; + case 4: + // Four lane floating point vector formats. + switch (vd.GetSizeInBits()) { + case 64: + return NEON_FP_4H; + case 128: + return NEON_FP_4S; + default: + VIXL_UNREACHABLE(); + } + break; + case 8: + // Eight lane floating point vector format. + VIXL_ASSERT(vd.Is128Bits()); + return NEON_FP_8H; + default: + VIXL_UNREACHABLE(); + return 0; + } + VIXL_UNREACHABLE(); + return 0; + } + + // Instruction bits for vector format in load and store operations. + static Instr LSVFormat(VRegister vd) { + if (vd.Is64Bits()) { + switch (vd.GetLanes()) { + case 1: + return LS_NEON_1D; + case 2: + return LS_NEON_2S; + case 4: + return LS_NEON_4H; + case 8: + return LS_NEON_8B; + default: + return 0xffffffff; + } + } else { + VIXL_ASSERT(vd.Is128Bits()); + switch (vd.GetLanes()) { + case 2: + return LS_NEON_2D; + case 4: + return LS_NEON_4S; + case 8: + return LS_NEON_8H; + case 16: + return LS_NEON_16B; + default: + return 0xffffffff; + } + } + } + + // Instruction bits for scalar format in data processing operations. + static Instr SFormat(VRegister vd) { + VIXL_ASSERT(vd.GetLanes() == 1); + switch (vd.GetSizeInBytes()) { + case 1: + return NEON_B; + case 2: + return NEON_H; + case 4: + return NEON_S; + case 8: + return NEON_D; + default: + return 0xffffffff; + } + } + + template + static Instr SVESize(const T& rd) { + VIXL_ASSERT(rd.IsZRegister() || rd.IsPRegister()); + VIXL_ASSERT(rd.HasLaneSize()); + switch (rd.GetLaneSizeInBytes()) { + case 1: + return SVE_B; + case 2: + return SVE_H; + case 4: + return SVE_S; + case 8: + return SVE_D; + default: + return 0xffffffff; + } + } + + static Instr ImmSVEPredicateConstraint(int pattern) { + VIXL_ASSERT(IsUint5(pattern)); + return (pattern << ImmSVEPredicateConstraint_offset) & + ImmSVEPredicateConstraint_mask; + } + + static Instr ImmNEONHLM(int index, int num_bits) { + int h, l, m; + if (num_bits == 3) { + VIXL_ASSERT(IsUint3(index)); + h = (index >> 2) & 1; + l = (index >> 1) & 1; + m = (index >> 0) & 1; + } else if (num_bits == 2) { + VIXL_ASSERT(IsUint2(index)); + h = (index >> 1) & 1; + l = (index >> 0) & 1; + m = 0; + } else { + VIXL_ASSERT(IsUint1(index) && (num_bits == 1)); + h = (index >> 0) & 1; + l = 0; + m = 0; + } + return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); + } + + static Instr ImmRotFcadd(int rot) { + VIXL_ASSERT(rot == 90 || rot == 270); + return (((rot == 270) ? 1 : 0) << ImmRotFcadd_offset); + } + + static Instr ImmRotFcmlaSca(int rot) { + VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270); + return (rot / 90) << ImmRotFcmlaSca_offset; + } + + static Instr ImmRotFcmlaVec(int rot) { + VIXL_ASSERT(rot == 0 || rot == 90 || rot == 180 || rot == 270); + return (rot / 90) << ImmRotFcmlaVec_offset; + } + + static Instr ImmNEONExt(int imm4) { + VIXL_ASSERT(IsUint4(imm4)); + return imm4 << ImmNEONExt_offset; + } + + static Instr ImmNEON5(Instr format, int index) { + VIXL_ASSERT(IsUint4(index)); + int s = LaneSizeInBytesLog2FromFormat(static_cast(format)); + int imm5 = (index << (s + 1)) | (1 << s); + return imm5 << ImmNEON5_offset; + } + + static Instr ImmNEON4(Instr format, int index) { + VIXL_ASSERT(IsUint4(index)); + int s = LaneSizeInBytesLog2FromFormat(static_cast(format)); + int imm4 = index << s; + return imm4 << ImmNEON4_offset; + } + + static Instr ImmNEONabcdefgh(int imm8) { + VIXL_ASSERT(IsUint8(imm8)); + Instr instr; + instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; + instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; + return instr; + } + + static Instr NEONCmode(int cmode) { + VIXL_ASSERT(IsUint4(cmode)); + return cmode << NEONCmode_offset; + } + + static Instr NEONModImmOp(int op) { + VIXL_ASSERT(IsUint1(op)); + return op << NEONModImmOp_offset; + } + + // Size of the code generated since label to the current position. + size_t GetSizeOfCodeGeneratedSince(Label* label) const { + VIXL_ASSERT(label->IsBound()); + return GetBuffer().GetOffsetFrom(label->GetLocation()); + } + VIXL_DEPRECATED("GetSizeOfCodeGeneratedSince", + size_t SizeOfCodeGeneratedSince(Label* label) const) { + return GetSizeOfCodeGeneratedSince(label); + } + + VIXL_DEPRECATED("GetBuffer().GetCapacity()", + size_t GetBufferCapacity() const) { + return GetBuffer().GetCapacity(); + } + VIXL_DEPRECATED("GetBuffer().GetCapacity()", size_t BufferCapacity() const) { + return GetBuffer().GetCapacity(); + } + + VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()", + size_t GetRemainingBufferSpace() const) { + return GetBuffer().GetRemainingBytes(); + } + VIXL_DEPRECATED("GetBuffer().GetRemainingBytes()", + size_t RemainingBufferSpace() const) { + return GetBuffer().GetRemainingBytes(); + } + + PositionIndependentCodeOption GetPic() const { return pic_; } + VIXL_DEPRECATED("GetPic", PositionIndependentCodeOption pic() const) { + return GetPic(); + } + + CPUFeatures* GetCPUFeatures() { return &cpu_features_; } + + void SetCPUFeatures(const CPUFeatures& cpu_features) { + cpu_features_ = cpu_features; + } + + bool AllowPageOffsetDependentCode() const { + return (GetPic() == PageOffsetDependentCode) || + (GetPic() == PositionDependentCode); + } + + static Register AppropriateZeroRegFor(const CPURegister& reg) { + return reg.Is64Bits() ? Register(xzr) : Register(wzr); + } + + protected: + void LoadStore(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op, + LoadStoreScalingOption option = PreferScaledOffset); + + void LoadStorePAC(const Register& xt, + const MemOperand& addr, + LoadStorePACOp op); + + void LoadStorePair(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op); + void LoadStoreStruct(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreMultiStructOp op); + void LoadStoreStruct1(const VRegister& vt, + int reg_count, + const MemOperand& addr); + void LoadStoreStructSingle(const VRegister& vt, + uint32_t lane, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op); + void LoadStoreStructSingleAllLanes(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op); + void LoadStoreStructVerify(const VRegister& vt, + const MemOperand& addr, + Instr op); + + // Set `is_load` to false in default as it's only used in the + // scalar-plus-vector form. + Instr SVEMemOperandHelper(unsigned msize_in_bytes_log2, + int num_regs, + const SVEMemOperand& addr, + bool is_load = false); + + // E.g. st1b, st1h, ... + // This supports both contiguous and scatter stores. + void SVESt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // E.g. ld1b, ld1h, ... + // This supports both contiguous and gather loads. + void SVELd1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed); + + // E.g. ld1rb, ld1rh, ... + void SVELd1BroadcastHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed); + + // E.g. ldff1b, ldff1h, ... + // This supports both contiguous and gather loads. + void SVELdff1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed); + + // Common code for the helpers above. + void SVELdSt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_signed, + Instr op); + + // Common code for the helpers above. + void SVEScatterGatherHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_load, + bool is_signed, + bool is_first_fault); + + // E.g. st2b, st3h, ... + void SVESt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr); + + // E.g. ld2b, ld3h, ... + void SVELd234Helper(int num_regs, + const ZRegister& zt1, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + // Common code for the helpers above. + void SVELdSt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr, + Instr op); + + // E.g. ld1qb, ld1qh, ldnt1b, ... + void SVELd1St1ScaImmHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + Instr regoffset_op, + Instr immoffset_op, + int imm_divisor = 1); + + void SVELd1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize, + bool is_signed); + void SVESt1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize); + + void Prefetch(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + void Prefetch(int op, + const MemOperand& addr, + LoadStoreScalingOption option = PreferScaledOffset); + + // TODO(all): The third parameter should be passed by reference but gcc 4.8.2 + // reports a bogus uninitialised warning then. + void Logical(const Register& rd, + const Register& rn, + const Operand operand, + LogicalOp op); + + void SVELogicalImmediate(const ZRegister& zd, uint64_t imm, Instr op); + + void LogicalImmediate(const Register& rd, + const Register& rn, + unsigned n, + unsigned imm_s, + unsigned imm_r, + LogicalOp op); + + void ConditionalCompare(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op); + + void AddSubWithCarry(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op); + + void CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEIntCompareVectorsOp op); + + void CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm, + SVEIntCompareSignedImmOp op); + + void CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm, + SVEIntCompareUnsignedImmOp op); + + void SVEIntAddSubtractImmUnpredicatedHelper( + SVEIntAddSubtractImm_UnpredicatedOp op, + const ZRegister& zd, + int imm8, + int shift); + + void SVEElementCountToRegisterHelper(Instr op, + const Register& rd, + int pattern, + int multiplier); + + Instr EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits); + + Instr EncodeSVEShiftRightImmediate(int shift, int lane_size_in_bits); + + void SVEBitwiseShiftImmediate(const ZRegister& zd, + const ZRegister& zn, + Instr encoded_imm, + Instr op); + + void SVEBitwiseShiftImmediatePred(const ZRegister& zdn, + const PRegisterM& pg, + Instr encoded_imm, + Instr op); + + Instr SVEMulIndexHelper(unsigned lane_size_in_bytes_log2, + const ZRegister& zm, + int index, + Instr op_h, + Instr op_s, + Instr op_d); + + Instr SVEMulLongIndexHelper(const ZRegister& zm, int index); + + Instr SVEMulComplexIndexHelper(const ZRegister& zm, int index); + + void SVEContiguousPrefetchScalarPlusScalarHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEContiguousPrefetchScalarPlusVectorHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEGatherPrefetchVectorPlusImmediateHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEGatherPrefetchScalarPlusImmediateHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + void SVEPrefetchHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size); + + static Instr SVEImmPrefetchOperation(PrefetchOperation prfop) { + // SVE only supports PLD and PST, not PLI. + VIXL_ASSERT(((prfop >= PLDL1KEEP) && (prfop <= PLDL3STRM)) || + ((prfop >= PSTL1KEEP) && (prfop <= PSTL3STRM))); + // Check that we can simply map bits. + VIXL_STATIC_ASSERT(PLDL1KEEP == 0b00000); + VIXL_STATIC_ASSERT(PSTL1KEEP == 0b10000); + // Remaining operations map directly. + return ((prfop & 0b10000) >> 1) | (prfop & 0b00111); + } + + // Functions for emulating operands not directly supported by the instruction + // set. + void EmitShift(const Register& rd, + const Register& rn, + Shift shift, + unsigned amount); + void EmitExtendShift(const Register& rd, + const Register& rn, + Extend extend, + unsigned left_shift); + + void AddSub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op); + + void NEONTable(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONTableOp op); + + // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified + // registers. Only simple loads are supported; sign- and zero-extension (such + // as in LDPSW_x or LDRB_w) are not supported. + static LoadStoreOp LoadOpFor(const CPURegister& rt); + static LoadStorePairOp LoadPairOpFor(const CPURegister& rt, + const CPURegister& rt2); + static LoadStoreOp StoreOpFor(const CPURegister& rt); + static LoadStorePairOp StorePairOpFor(const CPURegister& rt, + const CPURegister& rt2); + static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2); + static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2); + static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); + + // Convenience pass-through for CPU feature checks. + bool CPUHas(CPUFeatures::Feature feature0, + CPUFeatures::Feature feature1 = CPUFeatures::kNone, + CPUFeatures::Feature feature2 = CPUFeatures::kNone, + CPUFeatures::Feature feature3 = CPUFeatures::kNone) const { + return cpu_features_.Has(feature0, feature1, feature2, feature3); + } + + // Determine whether the target CPU has the specified registers, based on the + // currently-enabled CPU features. Presence of a register does not imply + // support for arbitrary operations on it. For example, CPUs with FP have H + // registers, but most half-precision operations require the FPHalf feature. + // + // These are used to check CPU features in loads and stores that have the same + // entry point for both integer and FP registers. + bool CPUHas(const CPURegister& rt) const; + bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const; + + bool CPUHas(SystemRegister sysreg) const; + + private: + static uint32_t FP16ToImm8(Float16 imm); + static uint32_t FP32ToImm8(float imm); + static uint32_t FP64ToImm8(double imm); + + // Instruction helpers. + void MoveWide(const Register& rd, + uint64_t imm, + int shift, + MoveWideImmediateOp mov_op); + void DataProcShiftedRegister(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + Instr op); + void DataProcExtendedRegister(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + Instr op); + void LoadStorePairNonTemporal(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairNonTemporalOp op); + void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op); + void ConditionalSelect(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond, + ConditionalSelectOp op); + void DataProcessing1Source(const Register& rd, + const Register& rn, + DataProcessing1SourceOp op); + void DataProcessing3Source(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra, + DataProcessing3SourceOp op); + void FPDataProcessing1Source(const VRegister& fd, + const VRegister& fn, + FPDataProcessing1SourceOp op); + void FPDataProcessing3Source(const VRegister& fd, + const VRegister& fn, + const VRegister& fm, + const VRegister& fa, + FPDataProcessing3SourceOp op); + void NEONAcrossLanesL(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op); + void NEONAcrossLanes(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op, + Instr op_half); + void NEONModifiedImmShiftLsl(const VRegister& vd, + const int imm8, + const int left_shift, + NEONModifiedImmediateOp op); + void NEONModifiedImmShiftMsl(const VRegister& vd, + const int imm8, + const int shift_amount, + NEONModifiedImmediateOp op); + void NEONFP2Same(const VRegister& vd, const VRegister& vn, Instr vop); + void NEON3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3SameOp vop); + void NEON3SameFP16(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Instr op); + void NEONFP3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Instr op); + void NEON3DifferentL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop); + void NEON3DifferentW(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop); + void NEON3DifferentHN(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop); + void NEONFP2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + double value = 0.0); + void NEONFP2RegMiscFP16(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscFP16Op vop, + double value = 0.0); + void NEON2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + int value = 0); + void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); + void NEONFP2RegMiscFP16(const VRegister& vd, const VRegister& vn, Instr op); + void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); + void NEONPerm(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONPermOp op); + void NEONFPByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp op, + NEONByIndexedElementOp op_half); + void NEONByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp op); + void NEONByElementL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp op); + void NEONShiftImmediate(const VRegister& vd, + const VRegister& vn, + NEONShiftImmediateOp op, + int immh_immb); + void NEONShiftLeftImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONShiftRightImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONShiftImmediateL(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONShiftImmediateN(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op); + void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); + + // If *shift is -1, find values of *imm8 and *shift such that IsInt8(*imm8) + // and *shift is either 0 or 8. Otherwise, leave the values unchanged. + void ResolveSVEImm8Shift(int* imm8, int* shift); + + Instr LoadStoreStructAddrModeField(const MemOperand& addr); + + // Encode the specified MemOperand for the specified access size and scaling + // preference. + Instr LoadStoreMemOperand(const MemOperand& addr, + unsigned access_size_in_bytes_log2, + LoadStoreScalingOption option); + + // Link the current (not-yet-emitted) instruction to the specified label, then + // return an offset to be encoded in the instruction. If the label is not yet + // bound, an offset of 0 is returned. + ptrdiff_t LinkAndGetByteOffsetTo(Label* label); + ptrdiff_t LinkAndGetInstructionOffsetTo(Label* label); + ptrdiff_t LinkAndGetPageOffsetTo(Label* label); + + // A common implementation for the LinkAndGetOffsetTo helpers. + template + ptrdiff_t LinkAndGetOffsetTo(Label* label); + + // Literal load offset are in words (32-bit). + ptrdiff_t LinkAndGetWordOffsetTo(RawLiteral* literal); + + // Emit the instruction in buffer_. + void Emit(Instr instruction) { + VIXL_STATIC_ASSERT(sizeof(instruction) == kInstructionSize); + VIXL_ASSERT(AllowAssembler()); + GetBuffer()->Emit32(instruction); + } + + PositionIndependentCodeOption pic_; + + CPUFeatures cpu_features_; +}; + + +template +void Literal::UpdateValue(T new_value, const Assembler* assembler) { + return UpdateValue(new_value, + assembler->GetBuffer().GetStartAddress()); +} + + +template +void Literal::UpdateValue(T high64, T low64, const Assembler* assembler) { + return UpdateValue(high64, + low64, + assembler->GetBuffer().GetStartAddress()); +} + + +} // namespace aarch64 + +// Required InvalSet template specialisations. +// TODO: These template specialisations should not live in this file. Move +// Label out of the aarch64 namespace in order to share its implementation +// later. +#define INVAL_SET_TEMPLATE_PARAMETERS \ + ptrdiff_t, aarch64::Label::kNPreallocatedLinks, ptrdiff_t, \ + aarch64::Label::kInvalidLinkKey, aarch64::Label::kReclaimFrom, \ + aarch64::Label::kReclaimFactor +template <> +inline ptrdiff_t InvalSet::GetKey( + const ptrdiff_t& element) { + return element; +} +template <> +inline void InvalSet::SetKey(ptrdiff_t* element, + ptrdiff_t key) { + *element = key; +} +#undef INVAL_SET_TEMPLATE_PARAMETERS + +} // namespace vixl + +#endif // VIXL_AARCH64_ASSEMBLER_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/constants-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/constants-aarch64.h new file mode 100644 index 0000000000..6982271e7f --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/constants-aarch64.h @@ -0,0 +1,4456 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_CONSTANTS_AARCH64_H_ +#define VIXL_AARCH64_CONSTANTS_AARCH64_H_ + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion" +#endif + +#include "../globals-vixl.h" + +namespace vixl { +namespace aarch64 { + +const unsigned kNumberOfRegisters = 32; +const unsigned kNumberOfVRegisters = 32; +const unsigned kNumberOfZRegisters = kNumberOfVRegisters; +const unsigned kNumberOfPRegisters = 16; +// Callee saved registers are x21-x30(lr). +const int kNumberOfCalleeSavedRegisters = 10; +const int kFirstCalleeSavedRegisterIndex = 21; +// Callee saved FP registers are d8-d15. Note that the high parts of v8-v15 are +// still caller-saved. +const int kNumberOfCalleeSavedFPRegisters = 8; +const int kFirstCalleeSavedFPRegisterIndex = 8; +// All predicated instructions accept at least p0-p7 as the governing predicate. +const unsigned kNumberOfGoverningPRegisters = 8; + +// clang-format off +#define AARCH64_P_REGISTER_CODE_LIST(R) \ + R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ + R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) + +#define AARCH64_REGISTER_CODE_LIST(R) \ + R(0) R(1) R(2) R(3) R(4) R(5) R(6) R(7) \ + R(8) R(9) R(10) R(11) R(12) R(13) R(14) R(15) \ + R(16) R(17) R(18) R(19) R(20) R(21) R(22) R(23) \ + R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31) + +// SVE loads and stores use "w" instead of "s" for word-sized accesses, so the +// mapping from the load/store variant to constants like k*RegSize is irregular. +#define VIXL_SVE_LOAD_STORE_VARIANT_LIST(V) \ + V(b, B) \ + V(h, H) \ + V(w, S) \ + V(d, D) + +// Sign-extending loads don't have double-word variants. +#define VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(V) \ + V(b, B) \ + V(h, H) \ + V(w, S) + +#define INSTRUCTION_FIELDS_LIST(V_) \ +/* Register fields */ \ +V_(Rd, 4, 0, ExtractBits) /* Destination register. */ \ +V_(Rn, 9, 5, ExtractBits) /* First source register. */ \ +V_(Rm, 20, 16, ExtractBits) /* Second source register. */ \ +V_(RmLow16, 19, 16, ExtractBits) /* Second source register (code 0-15). */ \ +V_(Ra, 14, 10, ExtractBits) /* Third source register. */ \ +V_(Rt, 4, 0, ExtractBits) /* Load/store register. */ \ +V_(Rt2, 14, 10, ExtractBits) /* Load/store second register. */ \ +V_(Rs, 20, 16, ExtractBits) /* Exclusive access status. */ \ +V_(Pt, 3, 0, ExtractBits) /* Load/store register (p0-p7). */ \ +V_(Pd, 3, 0, ExtractBits) /* SVE destination predicate register. */ \ +V_(Pn, 8, 5, ExtractBits) /* SVE first source predicate register. */ \ +V_(Pm, 19, 16, ExtractBits) /* SVE second source predicate register.*/ \ +V_(PgLow8, 12, 10, ExtractBits) /* Governing predicate (p0-p7). */ \ + \ +/* Common bits */ \ +V_(SixtyFourBits, 31, 31, ExtractBits) \ +V_(FlagsUpdate, 29, 29, ExtractBits) \ + \ +/* PC relative addressing */ \ +V_(ImmPCRelHi, 23, 5, ExtractSignedBits) \ +V_(ImmPCRelLo, 30, 29, ExtractBits) \ + \ +/* Add/subtract/logical shift register */ \ +V_(ShiftDP, 23, 22, ExtractBits) \ +V_(ImmDPShift, 15, 10, ExtractBits) \ + \ +/* Add/subtract immediate */ \ +V_(ImmAddSub, 21, 10, ExtractBits) \ +V_(ImmAddSubShift, 22, 22, ExtractBits) \ + \ +/* Add/substract extend */ \ +V_(ImmExtendShift, 12, 10, ExtractBits) \ +V_(ExtendMode, 15, 13, ExtractBits) \ + \ +/* Move wide */ \ +V_(ImmMoveWide, 20, 5, ExtractBits) \ +V_(ShiftMoveWide, 22, 21, ExtractBits) \ + \ +/* Logical immediate, bitfield and extract */ \ +V_(BitN, 22, 22, ExtractBits) \ +V_(ImmRotate, 21, 16, ExtractBits) \ +V_(ImmSetBits, 15, 10, ExtractBits) \ +V_(ImmR, 21, 16, ExtractBits) \ +V_(ImmS, 15, 10, ExtractBits) \ + \ +/* Test and branch immediate */ \ +V_(ImmTestBranch, 18, 5, ExtractSignedBits) \ +V_(ImmTestBranchBit40, 23, 19, ExtractBits) \ +V_(ImmTestBranchBit5, 31, 31, ExtractBits) \ + \ +/* Conditionals */ \ +V_(Condition, 15, 12, ExtractBits) \ +V_(ConditionBranch, 3, 0, ExtractBits) \ +V_(Nzcv, 3, 0, ExtractBits) \ +V_(ImmCondCmp, 20, 16, ExtractBits) \ +V_(ImmCondBranch, 23, 5, ExtractSignedBits) \ + \ +/* Floating point */ \ +V_(FPType, 23, 22, ExtractBits) \ +V_(ImmFP, 20, 13, ExtractBits) \ +V_(FPScale, 15, 10, ExtractBits) \ + \ +/* Load Store */ \ +V_(ImmLS, 20, 12, ExtractSignedBits) \ +V_(ImmLSUnsigned, 21, 10, ExtractBits) \ +V_(ImmLSPair, 21, 15, ExtractSignedBits) \ +V_(ImmShiftLS, 12, 12, ExtractBits) \ +V_(LSOpc, 23, 22, ExtractBits) \ +V_(LSVector, 26, 26, ExtractBits) \ +V_(LSSize, 31, 30, ExtractBits) \ +V_(ImmPrefetchOperation, 4, 0, ExtractBits) \ +V_(PrefetchHint, 4, 3, ExtractBits) \ +V_(PrefetchTarget, 2, 1, ExtractBits) \ +V_(PrefetchStream, 0, 0, ExtractBits) \ +V_(ImmLSPACHi, 22, 22, ExtractSignedBits) \ +V_(ImmLSPACLo, 20, 12, ExtractBits) \ + \ +/* Other immediates */ \ +V_(ImmUncondBranch, 25, 0, ExtractSignedBits) \ +V_(ImmCmpBranch, 23, 5, ExtractSignedBits) \ +V_(ImmLLiteral, 23, 5, ExtractSignedBits) \ +V_(ImmException, 20, 5, ExtractBits) \ +V_(ImmHint, 11, 5, ExtractBits) \ +V_(ImmBarrierDomain, 11, 10, ExtractBits) \ +V_(ImmBarrierType, 9, 8, ExtractBits) \ +V_(ImmUdf, 15, 0, ExtractBits) \ + \ +/* System (MRS, MSR, SYS) */ \ +V_(ImmSystemRegister, 20, 5, ExtractBits) \ +V_(SysO0, 19, 19, ExtractBits) \ +V_(SysOp, 18, 5, ExtractBits) \ +V_(SysOp0, 20, 19, ExtractBits) \ +V_(SysOp1, 18, 16, ExtractBits) \ +V_(SysOp2, 7, 5, ExtractBits) \ +V_(CRn, 15, 12, ExtractBits) \ +V_(CRm, 11, 8, ExtractBits) \ +V_(ImmRMIFRotation, 20, 15, ExtractBits) \ + \ +/* Load-/store-exclusive */ \ +V_(LdStXLoad, 22, 22, ExtractBits) \ +V_(LdStXNotExclusive, 23, 23, ExtractBits) \ +V_(LdStXAcquireRelease, 15, 15, ExtractBits) \ +V_(LdStXSizeLog2, 31, 30, ExtractBits) \ +V_(LdStXPair, 21, 21, ExtractBits) \ + \ +/* NEON generic fields */ \ +V_(NEONQ, 30, 30, ExtractBits) \ +V_(NEONSize, 23, 22, ExtractBits) \ +V_(NEONLSSize, 11, 10, ExtractBits) \ +V_(NEONS, 12, 12, ExtractBits) \ +V_(NEONL, 21, 21, ExtractBits) \ +V_(NEONM, 20, 20, ExtractBits) \ +V_(NEONH, 11, 11, ExtractBits) \ +V_(ImmNEONExt, 14, 11, ExtractBits) \ +V_(ImmNEON5, 20, 16, ExtractBits) \ +V_(ImmNEON4, 14, 11, ExtractBits) \ + \ +/* NEON extra fields */ \ +V_(ImmRotFcadd, 12, 12, ExtractBits) \ +V_(ImmRotFcmlaVec, 12, 11, ExtractBits) \ +V_(ImmRotFcmlaSca, 14, 13, ExtractBits) \ + \ +/* NEON Modified Immediate fields */ \ +V_(ImmNEONabc, 18, 16, ExtractBits) \ +V_(ImmNEONdefgh, 9, 5, ExtractBits) \ +V_(NEONModImmOp, 29, 29, ExtractBits) \ +V_(NEONCmode, 15, 12, ExtractBits) \ + \ +/* NEON Shift Immediate fields */ \ +V_(ImmNEONImmhImmb, 22, 16, ExtractBits) \ +V_(ImmNEONImmh, 22, 19, ExtractBits) \ +V_(ImmNEONImmb, 18, 16, ExtractBits) \ + \ +/* SVE generic fields */ \ +V_(SVESize, 23, 22, ExtractBits) \ +V_(ImmSVEVLScale, 10, 5, ExtractSignedBits) \ +V_(ImmSVEIntWideSigned, 12, 5, ExtractSignedBits) \ +V_(ImmSVEIntWideUnsigned, 12, 5, ExtractBits) \ +V_(ImmSVEPredicateConstraint, 9, 5, ExtractBits) \ + \ +/* SVE Bitwise Immediate bitfield */ \ +V_(SVEBitN, 17, 17, ExtractBits) \ +V_(SVEImmRotate, 16, 11, ExtractBits) \ +V_(SVEImmSetBits, 10, 5, ExtractBits) \ + \ +V_(SVEImmPrefetchOperation, 3, 0, ExtractBits) \ +V_(SVEPrefetchHint, 3, 3, ExtractBits) + +// clang-format on + +#define SYSTEM_REGISTER_FIELDS_LIST(V_, M_) \ + /* NZCV */ \ + V_(Flags, 31, 28, ExtractBits) \ + V_(N, 31, 31, ExtractBits) \ + V_(Z, 30, 30, ExtractBits) \ + V_(C, 29, 29, ExtractBits) \ + V_(V, 28, 28, ExtractBits) \ + M_(NZCV, Flags_mask) \ + /* FPCR */ \ + V_(AHP, 26, 26, ExtractBits) \ + V_(DN, 25, 25, ExtractBits) \ + V_(FZ, 24, 24, ExtractBits) \ + V_(RMode, 23, 22, ExtractBits) \ + M_(FPCR, AHP_mask | DN_mask | FZ_mask | RMode_mask) + +// Fields offsets. +#define DECLARE_FIELDS_OFFSETS(Name, HighBit, LowBit, X) \ + const int Name##_offset = LowBit; \ + const int Name##_width = HighBit - LowBit + 1; \ + const uint32_t Name##_mask = ((1 << Name##_width) - 1) << LowBit; +#define NOTHING(A, B) +INSTRUCTION_FIELDS_LIST(DECLARE_FIELDS_OFFSETS) +SYSTEM_REGISTER_FIELDS_LIST(DECLARE_FIELDS_OFFSETS, NOTHING) +#undef NOTHING +#undef DECLARE_FIELDS_BITS + +// ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), formed +// from ImmPCRelLo and ImmPCRelHi. +const int ImmPCRel_mask = ImmPCRelLo_mask | ImmPCRelHi_mask; + +// Disable `clang-format` for the `enum`s below. We care about the manual +// formatting that `clang-format` would destroy. +// clang-format off + +// Condition codes. +enum Condition { + eq = 0, // Z set Equal. + ne = 1, // Z clear Not equal. + cs = 2, // C set Carry set. + cc = 3, // C clear Carry clear. + mi = 4, // N set Negative. + pl = 5, // N clear Positive or zero. + vs = 6, // V set Overflow. + vc = 7, // V clear No overflow. + hi = 8, // C set, Z clear Unsigned higher. + ls = 9, // C clear or Z set Unsigned lower or same. + ge = 10, // N == V Greater or equal. + lt = 11, // N != V Less than. + gt = 12, // Z clear, N == V Greater than. + le = 13, // Z set or N != V Less then or equal + al = 14, // Always. + nv = 15, // Behaves as always/al. + + // Aliases. + hs = cs, // C set Unsigned higher or same. + lo = cc, // C clear Unsigned lower. + + // Floating-point additional condition code. + uo, // Unordered comparison. + + // SVE predicate condition aliases. + sve_none = eq, // No active elements were true. + sve_any = ne, // An active element was true. + sve_nlast = cs, // The last element was not true. + sve_last = cc, // The last element was true. + sve_first = mi, // The first element was true. + sve_nfrst = pl, // The first element was not true. + sve_pmore = hi, // An active element was true but not the last element. + sve_plast = ls, // The last active element was true or no active elements were true. + sve_tcont = ge, // CTERM termination condition not deleted. + sve_tstop = lt // CTERM termination condition deleted. +}; + +inline Condition InvertCondition(Condition cond) { + // Conditions al and nv behave identically, as "always true". They can't be + // inverted, because there is no "always false" condition. + VIXL_ASSERT((cond != al) && (cond != nv)); + return static_cast(cond ^ 1); +} + +enum FPTrapFlags { + EnableTrap = 1, + DisableTrap = 0 +}; + +enum FlagsUpdate { + SetFlags = 1, + LeaveFlags = 0 +}; + +enum StatusFlags { + NoFlag = 0, + + // Derive the flag combinations from the system register bit descriptions. + NFlag = N_mask, + ZFlag = Z_mask, + CFlag = C_mask, + VFlag = V_mask, + NZFlag = NFlag | ZFlag, + NCFlag = NFlag | CFlag, + NVFlag = NFlag | VFlag, + ZCFlag = ZFlag | CFlag, + ZVFlag = ZFlag | VFlag, + CVFlag = CFlag | VFlag, + NZCFlag = NFlag | ZFlag | CFlag, + NZVFlag = NFlag | ZFlag | VFlag, + NCVFlag = NFlag | CFlag | VFlag, + ZCVFlag = ZFlag | CFlag | VFlag, + NZCVFlag = NFlag | ZFlag | CFlag | VFlag, + + // Floating-point comparison results. + FPEqualFlag = ZCFlag, + FPLessThanFlag = NFlag, + FPGreaterThanFlag = CFlag, + FPUnorderedFlag = CVFlag, + + // SVE condition flags. + SVEFirstFlag = NFlag, + SVENoneFlag = ZFlag, + SVENotLastFlag = CFlag +}; + +enum Shift { + NO_SHIFT = -1, + LSL = 0x0, + LSR = 0x1, + ASR = 0x2, + ROR = 0x3, + MSL = 0x4 +}; + +enum Extend { + NO_EXTEND = -1, + UXTB = 0, + UXTH = 1, + UXTW = 2, + UXTX = 3, + SXTB = 4, + SXTH = 5, + SXTW = 6, + SXTX = 7 +}; + +enum SVEOffsetModifier { + NO_SVE_OFFSET_MODIFIER, + // Multiply (each element of) the offset by either the vector or predicate + // length, according to the context. + SVE_MUL_VL, + // Shift or extend modifiers (as in `Shift` or `Extend`). + SVE_LSL, + SVE_UXTW, + SVE_SXTW +}; + +enum SystemHint { + NOP = 0, + YIELD = 1, + WFE = 2, + WFI = 3, + SEV = 4, + SEVL = 5, + ESB = 16, + CSDB = 20, + BTI = 32, + BTI_c = 34, + BTI_j = 36, + BTI_jc = 38 +}; + +enum BranchTargetIdentifier { + EmitBTI_none = NOP, + EmitBTI = BTI, + EmitBTI_c = BTI_c, + EmitBTI_j = BTI_j, + EmitBTI_jc = BTI_jc, + + // These correspond to the values of the CRm:op2 fields in the equivalent HINT + // instruction. + EmitPACIASP = 25, + EmitPACIBSP = 27 +}; + +enum BarrierDomain { + OuterShareable = 0, + NonShareable = 1, + InnerShareable = 2, + FullSystem = 3 +}; + +enum BarrierType { + BarrierOther = 0, + BarrierReads = 1, + BarrierWrites = 2, + BarrierAll = 3 +}; + +enum PrefetchOperation { + PLDL1KEEP = 0x00, + PLDL1STRM = 0x01, + PLDL2KEEP = 0x02, + PLDL2STRM = 0x03, + PLDL3KEEP = 0x04, + PLDL3STRM = 0x05, + + PrfUnallocated06 = 0x06, + PrfUnallocated07 = 0x07, + + PLIL1KEEP = 0x08, + PLIL1STRM = 0x09, + PLIL2KEEP = 0x0a, + PLIL2STRM = 0x0b, + PLIL3KEEP = 0x0c, + PLIL3STRM = 0x0d, + + PrfUnallocated0e = 0x0e, + PrfUnallocated0f = 0x0f, + + PSTL1KEEP = 0x10, + PSTL1STRM = 0x11, + PSTL2KEEP = 0x12, + PSTL2STRM = 0x13, + PSTL3KEEP = 0x14, + PSTL3STRM = 0x15, + + PrfUnallocated16 = 0x16, + PrfUnallocated17 = 0x17, + PrfUnallocated18 = 0x18, + PrfUnallocated19 = 0x19, + PrfUnallocated1a = 0x1a, + PrfUnallocated1b = 0x1b, + PrfUnallocated1c = 0x1c, + PrfUnallocated1d = 0x1d, + PrfUnallocated1e = 0x1e, + PrfUnallocated1f = 0x1f, +}; + +constexpr bool IsNamedPrefetchOperation(int op) { + return ((op >= PLDL1KEEP) && (op <= PLDL3STRM)) || + ((op >= PLIL1KEEP) && (op <= PLIL3STRM)) || + ((op >= PSTL1KEEP) && (op <= PSTL3STRM)); +} + +enum BType { + // Set when executing any instruction on a guarded page, except those cases + // listed below. + DefaultBType = 0, + + // Set when an indirect branch is taken from an unguarded page to a guarded + // page, or from a guarded page to ip0 or ip1 (x16 or x17), eg "br ip0". + BranchFromUnguardedOrToIP = 1, + + // Set when an indirect branch and link (call) is taken, eg. "blr x0". + BranchAndLink = 2, + + // Set when an indirect branch is taken from a guarded page to a register + // that is not ip0 or ip1 (x16 or x17), eg, "br x0". + BranchFromGuardedNotToIP = 3 +}; + +template +class SystemRegisterEncoder { + public: + static const uint32_t value = + ((op0 << SysO0_offset) | + (op1 << SysOp1_offset) | + (crn << CRn_offset) | + (crm << CRm_offset) | + (op2 << SysOp2_offset)) >> ImmSystemRegister_offset; +}; + +// System/special register names. +// This information is not encoded as one field but as the concatenation of +// multiple fields (Op0, Op1, Crn, Crm, Op2). +enum SystemRegister { + NZCV = SystemRegisterEncoder<3, 3, 4, 2, 0>::value, + FPCR = SystemRegisterEncoder<3, 3, 4, 4, 0>::value, + RNDR = SystemRegisterEncoder<3, 3, 2, 4, 0>::value, // Random number. + RNDRRS = SystemRegisterEncoder<3, 3, 2, 4, 1>::value // Reseeded random number. +}; + +template +class CacheOpEncoder { + public: + static const uint32_t value = + ((op1 << SysOp1_offset) | + (crn << CRn_offset) | + (crm << CRm_offset) | + (op2 << SysOp2_offset)) >> SysOp_offset; +}; + +enum InstructionCacheOp { + IVAU = CacheOpEncoder<3, 7, 5, 1>::value +}; + +enum DataCacheOp { + CVAC = CacheOpEncoder<3, 7, 10, 1>::value, + CVAU = CacheOpEncoder<3, 7, 11, 1>::value, + CVAP = CacheOpEncoder<3, 7, 12, 1>::value, + CVADP = CacheOpEncoder<3, 7, 13, 1>::value, + CIVAC = CacheOpEncoder<3, 7, 14, 1>::value, + ZVA = CacheOpEncoder<3, 7, 4, 1>::value, + GVA = CacheOpEncoder<3, 7, 4, 3>::value, + GZVA = CacheOpEncoder<3, 7, 4, 4>::value, + CGVAC = CacheOpEncoder<3, 7, 10, 3>::value, + CGDVAC = CacheOpEncoder<3, 7, 10, 5>::value, + CGVAP = CacheOpEncoder<3, 7, 12, 3>::value, + CGDVAP = CacheOpEncoder<3, 7, 12, 5>::value, + CIGVAC = CacheOpEncoder<3, 7, 14, 3>::value, + CIGDVAC = CacheOpEncoder<3, 7, 14, 5>::value +}; + +// Some SVE instructions support a predicate constraint pattern. This is +// interpreted as a VL-dependent value, and is typically used to initialise +// predicates, or to otherwise limit the number of processed elements. +enum SVEPredicateConstraint { + // Select 2^N elements, for the largest possible N. + SVE_POW2 = 0x0, + // Each VL selects exactly N elements if possible, or zero if N is greater + // than the number of elements. Note that the encoding values for VL are + // not linearly related to N. + SVE_VL1 = 0x1, + SVE_VL2 = 0x2, + SVE_VL3 = 0x3, + SVE_VL4 = 0x4, + SVE_VL5 = 0x5, + SVE_VL6 = 0x6, + SVE_VL7 = 0x7, + SVE_VL8 = 0x8, + SVE_VL16 = 0x9, + SVE_VL32 = 0xa, + SVE_VL64 = 0xb, + SVE_VL128 = 0xc, + SVE_VL256 = 0xd, + // Each MUL selects the largest multiple of N elements that the vector + // length supports. Note that for D-sized lanes, this can be zero. + SVE_MUL4 = 0x1d, + SVE_MUL3 = 0x1e, + // Select all elements. + SVE_ALL = 0x1f +}; + +// Instruction enumerations. +// +// These are the masks that define a class of instructions, and the list of +// instructions within each class. Each enumeration has a Fixed, FMask and +// Mask value. +// +// Fixed: The fixed bits in this instruction class. +// FMask: The mask used to extract the fixed bits in the class. +// Mask: The mask used to identify the instructions within a class. +// +// The enumerations can be used like this: +// +// VIXL_ASSERT(instr->Mask(PCRelAddressingFMask) == PCRelAddressingFixed); +// switch(instr->Mask(PCRelAddressingMask)) { +// case ADR: Format("adr 'Xd, 'AddrPCRelByte"); break; +// case ADRP: Format("adrp 'Xd, 'AddrPCRelPage"); break; +// default: printf("Unknown instruction\n"); +// } + + +// Generic fields. +enum GenericInstrField : uint32_t { + SixtyFourBits = 0x80000000u, + ThirtyTwoBits = 0x00000000u, + + FPTypeMask = 0x00C00000u, + FP16 = 0x00C00000u, + FP32 = 0x00000000u, + FP64 = 0x00400000u +}; + +enum NEONFormatField : uint32_t { + NEONFormatFieldMask = 0x40C00000u, + NEON_Q = 0x40000000u, + NEON_8B = 0x00000000u, + NEON_16B = NEON_8B | NEON_Q, + NEON_4H = 0x00400000u, + NEON_8H = NEON_4H | NEON_Q, + NEON_2S = 0x00800000u, + NEON_4S = NEON_2S | NEON_Q, + NEON_1D = 0x00C00000u, + NEON_2D = 0x00C00000u | NEON_Q +}; + +enum NEONFPFormatField : uint32_t { + NEONFPFormatFieldMask = 0x40400000u, + NEON_FP_4H = FP16, + NEON_FP_2S = FP32, + NEON_FP_8H = FP16 | NEON_Q, + NEON_FP_4S = FP32 | NEON_Q, + NEON_FP_2D = FP64 | NEON_Q +}; + +enum NEONLSFormatField : uint32_t { + NEONLSFormatFieldMask = 0x40000C00u, + LS_NEON_8B = 0x00000000u, + LS_NEON_16B = LS_NEON_8B | NEON_Q, + LS_NEON_4H = 0x00000400u, + LS_NEON_8H = LS_NEON_4H | NEON_Q, + LS_NEON_2S = 0x00000800u, + LS_NEON_4S = LS_NEON_2S | NEON_Q, + LS_NEON_1D = 0x00000C00u, + LS_NEON_2D = LS_NEON_1D | NEON_Q +}; + +enum NEONScalarFormatField : uint32_t { + NEONScalarFormatFieldMask = 0x00C00000u, + NEONScalar = 0x10000000u, + NEON_B = 0x00000000u, + NEON_H = 0x00400000u, + NEON_S = 0x00800000u, + NEON_D = 0x00C00000u +}; + +enum SVESizeField { + SVESizeFieldMask = 0x00C00000, + SVE_B = 0x00000000, + SVE_H = 0x00400000, + SVE_S = 0x00800000, + SVE_D = 0x00C00000 +}; + +// PC relative addressing. +enum PCRelAddressingOp : uint32_t { + PCRelAddressingFixed = 0x10000000u, + PCRelAddressingFMask = 0x1F000000u, + PCRelAddressingMask = 0x9F000000u, + ADR = PCRelAddressingFixed | 0x00000000u, + ADRP = PCRelAddressingFixed | 0x80000000u +}; + +// Add/sub (immediate, shifted and extended.) +const int kSFOffset = 31; +enum AddSubOp : uint32_t { + AddSubOpMask = 0x60000000u, + AddSubSetFlagsBit = 0x20000000u, + ADD = 0x00000000u, + ADDS = ADD | AddSubSetFlagsBit, + SUB = 0x40000000u, + SUBS = SUB | AddSubSetFlagsBit +}; + +#define ADD_SUB_OP_LIST(V) \ + V(ADD), \ + V(ADDS), \ + V(SUB), \ + V(SUBS) + +enum AddSubImmediateOp : uint32_t { + AddSubImmediateFixed = 0x11000000u, + AddSubImmediateFMask = 0x1F800000u, + AddSubImmediateMask = 0xFF800000u, + #define ADD_SUB_IMMEDIATE(A) \ + A##_w_imm = AddSubImmediateFixed | A, \ + A##_x_imm = AddSubImmediateFixed | A | SixtyFourBits + ADD_SUB_OP_LIST(ADD_SUB_IMMEDIATE) + #undef ADD_SUB_IMMEDIATE +}; + +enum AddSubShiftedOp : uint32_t { + AddSubShiftedFixed = 0x0B000000u, + AddSubShiftedFMask = 0x1F200000u, + AddSubShiftedMask = 0xFF200000u, + #define ADD_SUB_SHIFTED(A) \ + A##_w_shift = AddSubShiftedFixed | A, \ + A##_x_shift = AddSubShiftedFixed | A | SixtyFourBits + ADD_SUB_OP_LIST(ADD_SUB_SHIFTED) + #undef ADD_SUB_SHIFTED +}; + +enum AddSubExtendedOp : uint32_t { + AddSubExtendedFixed = 0x0B200000u, + AddSubExtendedFMask = 0x1F200000u, + AddSubExtendedMask = 0xFFE00000u, + #define ADD_SUB_EXTENDED(A) \ + A##_w_ext = AddSubExtendedFixed | A, \ + A##_x_ext = AddSubExtendedFixed | A | SixtyFourBits + ADD_SUB_OP_LIST(ADD_SUB_EXTENDED) + #undef ADD_SUB_EXTENDED +}; + +// Add/sub with carry. +enum AddSubWithCarryOp : uint32_t { + AddSubWithCarryFixed = 0x1A000000u, + AddSubWithCarryFMask = 0x1FE00000u, + AddSubWithCarryMask = 0xFFE0FC00u, + ADC_w = AddSubWithCarryFixed | ADD, + ADC_x = AddSubWithCarryFixed | ADD | SixtyFourBits, + ADC = ADC_w, + ADCS_w = AddSubWithCarryFixed | ADDS, + ADCS_x = AddSubWithCarryFixed | ADDS | SixtyFourBits, + SBC_w = AddSubWithCarryFixed | SUB, + SBC_x = AddSubWithCarryFixed | SUB | SixtyFourBits, + SBC = SBC_w, + SBCS_w = AddSubWithCarryFixed | SUBS, + SBCS_x = AddSubWithCarryFixed | SUBS | SixtyFourBits +}; + +// Rotate right into flags. +enum RotateRightIntoFlagsOp : uint32_t { + RotateRightIntoFlagsFixed = 0x1A000400u, + RotateRightIntoFlagsFMask = 0x1FE07C00u, + RotateRightIntoFlagsMask = 0xFFE07C10u, + RMIF = RotateRightIntoFlagsFixed | 0xA0000000u +}; + +// Evaluate into flags. +enum EvaluateIntoFlagsOp : uint32_t { + EvaluateIntoFlagsFixed = 0x1A000800u, + EvaluateIntoFlagsFMask = 0x1FE03C00u, + EvaluateIntoFlagsMask = 0xFFE07C1Fu, + SETF8 = EvaluateIntoFlagsFixed | 0x2000000Du, + SETF16 = EvaluateIntoFlagsFixed | 0x2000400Du +}; + + +// Logical (immediate and shifted register). +enum LogicalOp : uint32_t { + LogicalOpMask = 0x60200000u, + NOT = 0x00200000u, + AND = 0x00000000u, + BIC = AND | NOT, + ORR = 0x20000000u, + ORN = ORR | NOT, + EOR = 0x40000000u, + EON = EOR | NOT, + ANDS = 0x60000000u, + BICS = ANDS | NOT +}; + +// Logical immediate. +enum LogicalImmediateOp : uint32_t { + LogicalImmediateFixed = 0x12000000u, + LogicalImmediateFMask = 0x1F800000u, + LogicalImmediateMask = 0xFF800000u, + AND_w_imm = LogicalImmediateFixed | AND, + AND_x_imm = LogicalImmediateFixed | AND | SixtyFourBits, + ORR_w_imm = LogicalImmediateFixed | ORR, + ORR_x_imm = LogicalImmediateFixed | ORR | SixtyFourBits, + EOR_w_imm = LogicalImmediateFixed | EOR, + EOR_x_imm = LogicalImmediateFixed | EOR | SixtyFourBits, + ANDS_w_imm = LogicalImmediateFixed | ANDS, + ANDS_x_imm = LogicalImmediateFixed | ANDS | SixtyFourBits +}; + +// Logical shifted register. +enum LogicalShiftedOp : uint32_t { + LogicalShiftedFixed = 0x0A000000u, + LogicalShiftedFMask = 0x1F000000u, + LogicalShiftedMask = 0xFF200000u, + AND_w = LogicalShiftedFixed | AND, + AND_x = LogicalShiftedFixed | AND | SixtyFourBits, + AND_shift = AND_w, + BIC_w = LogicalShiftedFixed | BIC, + BIC_x = LogicalShiftedFixed | BIC | SixtyFourBits, + BIC_shift = BIC_w, + ORR_w = LogicalShiftedFixed | ORR, + ORR_x = LogicalShiftedFixed | ORR | SixtyFourBits, + ORR_shift = ORR_w, + ORN_w = LogicalShiftedFixed | ORN, + ORN_x = LogicalShiftedFixed | ORN | SixtyFourBits, + ORN_shift = ORN_w, + EOR_w = LogicalShiftedFixed | EOR, + EOR_x = LogicalShiftedFixed | EOR | SixtyFourBits, + EOR_shift = EOR_w, + EON_w = LogicalShiftedFixed | EON, + EON_x = LogicalShiftedFixed | EON | SixtyFourBits, + EON_shift = EON_w, + ANDS_w = LogicalShiftedFixed | ANDS, + ANDS_x = LogicalShiftedFixed | ANDS | SixtyFourBits, + ANDS_shift = ANDS_w, + BICS_w = LogicalShiftedFixed | BICS, + BICS_x = LogicalShiftedFixed | BICS | SixtyFourBits, + BICS_shift = BICS_w +}; + +// Move wide immediate. +enum MoveWideImmediateOp : uint32_t { + MoveWideImmediateFixed = 0x12800000u, + MoveWideImmediateFMask = 0x1F800000u, + MoveWideImmediateMask = 0xFF800000u, + MOVN = 0x00000000u, + MOVZ = 0x40000000u, + MOVK = 0x60000000u, + MOVN_w = MoveWideImmediateFixed | MOVN, + MOVN_x = MoveWideImmediateFixed | MOVN | SixtyFourBits, + MOVZ_w = MoveWideImmediateFixed | MOVZ, + MOVZ_x = MoveWideImmediateFixed | MOVZ | SixtyFourBits, + MOVK_w = MoveWideImmediateFixed | MOVK, + MOVK_x = MoveWideImmediateFixed | MOVK | SixtyFourBits +}; + +// Bitfield. +const int kBitfieldNOffset = 22; +enum BitfieldOp : uint32_t { + BitfieldFixed = 0x13000000u, + BitfieldFMask = 0x1F800000u, + BitfieldMask = 0xFF800000u, + SBFM_w = BitfieldFixed | 0x00000000u, + SBFM_x = BitfieldFixed | 0x80000000u, + SBFM = SBFM_w, + BFM_w = BitfieldFixed | 0x20000000u, + BFM_x = BitfieldFixed | 0xA0000000u, + BFM = BFM_w, + UBFM_w = BitfieldFixed | 0x40000000u, + UBFM_x = BitfieldFixed | 0xC0000000u, + UBFM = UBFM_w + // Bitfield N field. +}; + +// Extract. +enum ExtractOp : uint32_t { + ExtractFixed = 0x13800000u, + ExtractFMask = 0x1F800000u, + ExtractMask = 0xFFA00000u, + EXTR_w = ExtractFixed | 0x00000000u, + EXTR_x = ExtractFixed | 0x80000000u, + EXTR = EXTR_w +}; + +// Unconditional branch. +enum UnconditionalBranchOp : uint32_t { + UnconditionalBranchFixed = 0x14000000u, + UnconditionalBranchFMask = 0x7C000000u, + UnconditionalBranchMask = 0xFC000000u, + B = UnconditionalBranchFixed | 0x00000000u, + BL = UnconditionalBranchFixed | 0x80000000u +}; + +// Unconditional branch to register. +enum UnconditionalBranchToRegisterOp : uint32_t { + UnconditionalBranchToRegisterFixed = 0xD6000000u, + UnconditionalBranchToRegisterFMask = 0xFE000000u, + UnconditionalBranchToRegisterMask = 0xFFFFFC00u, + BR = UnconditionalBranchToRegisterFixed | 0x001F0000u, + BLR = UnconditionalBranchToRegisterFixed | 0x003F0000u, + RET = UnconditionalBranchToRegisterFixed | 0x005F0000u, + + BRAAZ = UnconditionalBranchToRegisterFixed | 0x001F0800u, + BRABZ = UnconditionalBranchToRegisterFixed | 0x001F0C00u, + BLRAAZ = UnconditionalBranchToRegisterFixed | 0x003F0800u, + BLRABZ = UnconditionalBranchToRegisterFixed | 0x003F0C00u, + RETAA = UnconditionalBranchToRegisterFixed | 0x005F0800u, + RETAB = UnconditionalBranchToRegisterFixed | 0x005F0C00u, + BRAA = UnconditionalBranchToRegisterFixed | 0x011F0800u, + BRAB = UnconditionalBranchToRegisterFixed | 0x011F0C00u, + BLRAA = UnconditionalBranchToRegisterFixed | 0x013F0800u, + BLRAB = UnconditionalBranchToRegisterFixed | 0x013F0C00u +}; + +// Compare and branch. +enum CompareBranchOp : uint32_t { + CompareBranchFixed = 0x34000000u, + CompareBranchFMask = 0x7E000000u, + CompareBranchMask = 0xFF000000u, + CBZ_w = CompareBranchFixed | 0x00000000u, + CBZ_x = CompareBranchFixed | 0x80000000u, + CBZ = CBZ_w, + CBNZ_w = CompareBranchFixed | 0x01000000u, + CBNZ_x = CompareBranchFixed | 0x81000000u, + CBNZ = CBNZ_w +}; + +// Test and branch. +enum TestBranchOp : uint32_t { + TestBranchFixed = 0x36000000u, + TestBranchFMask = 0x7E000000u, + TestBranchMask = 0x7F000000u, + TBZ = TestBranchFixed | 0x00000000u, + TBNZ = TestBranchFixed | 0x01000000u +}; + +// Conditional branch. +enum ConditionalBranchOp : uint32_t { + ConditionalBranchFixed = 0x54000000u, + ConditionalBranchFMask = 0xFE000000u, + ConditionalBranchMask = 0xFF000010u, + B_cond = ConditionalBranchFixed | 0x00000000u +}; + +// System. +// System instruction encoding is complicated because some instructions use op +// and CR fields to encode parameters. To handle this cleanly, the system +// instructions are split into more than one enum. + +enum SystemOp : uint32_t { + SystemFixed = 0xD5000000u, + SystemFMask = 0xFFC00000u +}; + +enum SystemSysRegOp : uint32_t { + SystemSysRegFixed = 0xD5100000u, + SystemSysRegFMask = 0xFFD00000u, + SystemSysRegMask = 0xFFF00000u, + MRS = SystemSysRegFixed | 0x00200000u, + MSR = SystemSysRegFixed | 0x00000000u +}; + +enum SystemPStateOp : uint32_t { + SystemPStateFixed = 0xD5004000u, + SystemPStateFMask = 0xFFF8F000u, + SystemPStateMask = 0xFFFFF0FFu, + CFINV = SystemPStateFixed | 0x0000001Fu, + XAFLAG = SystemPStateFixed | 0x0000003Fu, + AXFLAG = SystemPStateFixed | 0x0000005Fu +}; + +enum SystemHintOp : uint32_t { + SystemHintFixed = 0xD503201Fu, + SystemHintFMask = 0xFFFFF01Fu, + SystemHintMask = 0xFFFFF01Fu, + HINT = SystemHintFixed | 0x00000000u +}; + +enum SystemSysOp : uint32_t { + SystemSysFixed = 0xD5080000u, + SystemSysFMask = 0xFFF80000u, + SystemSysMask = 0xFFF80000u, + SYS = SystemSysFixed | 0x00000000u +}; + +// Exception. +enum ExceptionOp : uint32_t { + ExceptionFixed = 0xD4000000u, + ExceptionFMask = 0xFF000000u, + ExceptionMask = 0xFFE0001Fu, + HLT = ExceptionFixed | 0x00400000u, + BRK = ExceptionFixed | 0x00200000u, + SVC = ExceptionFixed | 0x00000001u, + HVC = ExceptionFixed | 0x00000002u, + SMC = ExceptionFixed | 0x00000003u, + DCPS1 = ExceptionFixed | 0x00A00001u, + DCPS2 = ExceptionFixed | 0x00A00002u, + DCPS3 = ExceptionFixed | 0x00A00003u +}; + +enum MemBarrierOp : uint32_t { + MemBarrierFixed = 0xD503309Fu, + MemBarrierFMask = 0xFFFFF09Fu, + MemBarrierMask = 0xFFFFF0FFu, + DSB = MemBarrierFixed | 0x00000000u, + DMB = MemBarrierFixed | 0x00000020u, + ISB = MemBarrierFixed | 0x00000040u +}; + +enum SystemExclusiveMonitorOp : uint32_t { + SystemExclusiveMonitorFixed = 0xD503305Fu, + SystemExclusiveMonitorFMask = 0xFFFFF0FFu, + SystemExclusiveMonitorMask = 0xFFFFF0FFu, + CLREX = SystemExclusiveMonitorFixed +}; + +enum SystemPAuthOp : uint32_t { + SystemPAuthFixed = 0xD503211Fu, + SystemPAuthFMask = 0xFFFFFD1Fu, + SystemPAuthMask = 0xFFFFFFFFu, + PACIA1716 = SystemPAuthFixed | 0x00000100u, + PACIB1716 = SystemPAuthFixed | 0x00000140u, + AUTIA1716 = SystemPAuthFixed | 0x00000180u, + AUTIB1716 = SystemPAuthFixed | 0x000001C0u, + PACIAZ = SystemPAuthFixed | 0x00000300u, + PACIASP = SystemPAuthFixed | 0x00000320u, + PACIBZ = SystemPAuthFixed | 0x00000340u, + PACIBSP = SystemPAuthFixed | 0x00000360u, + AUTIAZ = SystemPAuthFixed | 0x00000380u, + AUTIASP = SystemPAuthFixed | 0x000003A0u, + AUTIBZ = SystemPAuthFixed | 0x000003C0u, + AUTIBSP = SystemPAuthFixed | 0x000003E0u, + + // XPACLRI has the same fixed mask as System Hints and needs to be handled + // differently. + XPACLRI = 0xD50320FFu +}; + +// Any load or store. +enum LoadStoreAnyOp : uint32_t { + LoadStoreAnyFMask = 0x0a000000u, + LoadStoreAnyFixed = 0x08000000u +}; + +// Any load pair or store pair. +enum LoadStorePairAnyOp : uint32_t { + LoadStorePairAnyFMask = 0x3a000000u, + LoadStorePairAnyFixed = 0x28000000u +}; + +#define LOAD_STORE_PAIR_OP_LIST(V) \ + V(STP, w, 0x00000000u), \ + V(LDP, w, 0x00400000u), \ + V(LDPSW, x, 0x40400000u), \ + V(STP, x, 0x80000000u), \ + V(LDP, x, 0x80400000u), \ + V(STP, s, 0x04000000u), \ + V(LDP, s, 0x04400000u), \ + V(STP, d, 0x44000000u), \ + V(LDP, d, 0x44400000u), \ + V(STP, q, 0x84000000u), \ + V(LDP, q, 0x84400000u) + +// Load/store pair (post, pre and offset.) +enum LoadStorePairOp : uint32_t { + LoadStorePairMask = 0xC4400000u, + LoadStorePairLBit = 1 << 22, + #define LOAD_STORE_PAIR(A, B, C) \ + A##_##B = C + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR) + #undef LOAD_STORE_PAIR +}; + +enum LoadStorePairPostIndexOp : uint32_t { + LoadStorePairPostIndexFixed = 0x28800000u, + LoadStorePairPostIndexFMask = 0x3B800000u, + LoadStorePairPostIndexMask = 0xFFC00000u, + #define LOAD_STORE_PAIR_POST_INDEX(A, B, C) \ + A##_##B##_post = LoadStorePairPostIndexFixed | A##_##B + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_POST_INDEX) + #undef LOAD_STORE_PAIR_POST_INDEX +}; + +enum LoadStorePairPreIndexOp : uint32_t { + LoadStorePairPreIndexFixed = 0x29800000u, + LoadStorePairPreIndexFMask = 0x3B800000u, + LoadStorePairPreIndexMask = 0xFFC00000u, + #define LOAD_STORE_PAIR_PRE_INDEX(A, B, C) \ + A##_##B##_pre = LoadStorePairPreIndexFixed | A##_##B + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_PRE_INDEX) + #undef LOAD_STORE_PAIR_PRE_INDEX +}; + +enum LoadStorePairOffsetOp : uint32_t { + LoadStorePairOffsetFixed = 0x29000000u, + LoadStorePairOffsetFMask = 0x3B800000u, + LoadStorePairOffsetMask = 0xFFC00000u, + #define LOAD_STORE_PAIR_OFFSET(A, B, C) \ + A##_##B##_off = LoadStorePairOffsetFixed | A##_##B + LOAD_STORE_PAIR_OP_LIST(LOAD_STORE_PAIR_OFFSET) + #undef LOAD_STORE_PAIR_OFFSET +}; + +enum LoadStorePairNonTemporalOp : uint32_t { + LoadStorePairNonTemporalFixed = 0x28000000u, + LoadStorePairNonTemporalFMask = 0x3B800000u, + LoadStorePairNonTemporalMask = 0xFFC00000u, + LoadStorePairNonTemporalLBit = 1 << 22, + STNP_w = LoadStorePairNonTemporalFixed | STP_w, + LDNP_w = LoadStorePairNonTemporalFixed | LDP_w, + STNP_x = LoadStorePairNonTemporalFixed | STP_x, + LDNP_x = LoadStorePairNonTemporalFixed | LDP_x, + STNP_s = LoadStorePairNonTemporalFixed | STP_s, + LDNP_s = LoadStorePairNonTemporalFixed | LDP_s, + STNP_d = LoadStorePairNonTemporalFixed | STP_d, + LDNP_d = LoadStorePairNonTemporalFixed | LDP_d, + STNP_q = LoadStorePairNonTemporalFixed | STP_q, + LDNP_q = LoadStorePairNonTemporalFixed | LDP_q +}; + +// Load with pointer authentication. +enum LoadStorePACOp { + LoadStorePACFixed = 0xF8200400u, + LoadStorePACFMask = 0xFF200400u, + LoadStorePACMask = 0xFFA00C00u, + LoadStorePACPreBit = 0x00000800u, + LDRAA = LoadStorePACFixed | 0x00000000u, + LDRAA_pre = LoadStorePACPreBit | LDRAA, + LDRAB = LoadStorePACFixed | 0x00800000u, + LDRAB_pre = LoadStorePACPreBit | LDRAB +}; + +// Load literal. +enum LoadLiteralOp : uint32_t { + LoadLiteralFixed = 0x18000000u, + LoadLiteralFMask = 0x3B000000u, + LoadLiteralMask = 0xFF000000u, + LDR_w_lit = LoadLiteralFixed | 0x00000000u, + LDR_x_lit = LoadLiteralFixed | 0x40000000u, + LDRSW_x_lit = LoadLiteralFixed | 0x80000000u, + PRFM_lit = LoadLiteralFixed | 0xC0000000u, + LDR_s_lit = LoadLiteralFixed | 0x04000000u, + LDR_d_lit = LoadLiteralFixed | 0x44000000u, + LDR_q_lit = LoadLiteralFixed | 0x84000000u +}; + +#define LOAD_STORE_OP_LIST(V) \ + V(ST, RB, w, 0x00000000u), \ + V(ST, RH, w, 0x40000000u), \ + V(ST, R, w, 0x80000000u), \ + V(ST, R, x, 0xC0000000u), \ + V(LD, RB, w, 0x00400000u), \ + V(LD, RH, w, 0x40400000u), \ + V(LD, R, w, 0x80400000u), \ + V(LD, R, x, 0xC0400000u), \ + V(LD, RSB, x, 0x00800000u), \ + V(LD, RSH, x, 0x40800000u), \ + V(LD, RSW, x, 0x80800000u), \ + V(LD, RSB, w, 0x00C00000u), \ + V(LD, RSH, w, 0x40C00000u), \ + V(ST, R, b, 0x04000000u), \ + V(ST, R, h, 0x44000000u), \ + V(ST, R, s, 0x84000000u), \ + V(ST, R, d, 0xC4000000u), \ + V(ST, R, q, 0x04800000u), \ + V(LD, R, b, 0x04400000u), \ + V(LD, R, h, 0x44400000u), \ + V(LD, R, s, 0x84400000u), \ + V(LD, R, d, 0xC4400000u), \ + V(LD, R, q, 0x04C00000u) + +// Load/store (post, pre, offset and unsigned.) +enum LoadStoreOp : uint32_t { + LoadStoreMask = 0xC4C00000u, + LoadStoreVMask = 0x04000000u, + #define LOAD_STORE(A, B, C, D) \ + A##B##_##C = D + LOAD_STORE_OP_LIST(LOAD_STORE), + #undef LOAD_STORE + PRFM = 0xC0800000u +}; + +// Load/store unscaled offset. +enum LoadStoreUnscaledOffsetOp : uint32_t { + LoadStoreUnscaledOffsetFixed = 0x38000000u, + LoadStoreUnscaledOffsetFMask = 0x3B200C00u, + LoadStoreUnscaledOffsetMask = 0xFFE00C00u, + PRFUM = LoadStoreUnscaledOffsetFixed | PRFM, + #define LOAD_STORE_UNSCALED(A, B, C, D) \ + A##U##B##_##C = LoadStoreUnscaledOffsetFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_UNSCALED) + #undef LOAD_STORE_UNSCALED +}; + +// Load/store post index. +enum LoadStorePostIndex : uint32_t { + LoadStorePostIndexFixed = 0x38000400u, + LoadStorePostIndexFMask = 0x3B200C00u, + LoadStorePostIndexMask = 0xFFE00C00u, + #define LOAD_STORE_POST_INDEX(A, B, C, D) \ + A##B##_##C##_post = LoadStorePostIndexFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_POST_INDEX) + #undef LOAD_STORE_POST_INDEX +}; + +// Load/store pre index. +enum LoadStorePreIndex : uint32_t { + LoadStorePreIndexFixed = 0x38000C00u, + LoadStorePreIndexFMask = 0x3B200C00u, + LoadStorePreIndexMask = 0xFFE00C00u, + #define LOAD_STORE_PRE_INDEX(A, B, C, D) \ + A##B##_##C##_pre = LoadStorePreIndexFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_PRE_INDEX) + #undef LOAD_STORE_PRE_INDEX +}; + +// Load/store unsigned offset. +enum LoadStoreUnsignedOffset : uint32_t { + LoadStoreUnsignedOffsetFixed = 0x39000000u, + LoadStoreUnsignedOffsetFMask = 0x3B000000u, + LoadStoreUnsignedOffsetMask = 0xFFC00000u, + PRFM_unsigned = LoadStoreUnsignedOffsetFixed | PRFM, + #define LOAD_STORE_UNSIGNED_OFFSET(A, B, C, D) \ + A##B##_##C##_unsigned = LoadStoreUnsignedOffsetFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_UNSIGNED_OFFSET) + #undef LOAD_STORE_UNSIGNED_OFFSET +}; + +// Load/store register offset. +enum LoadStoreRegisterOffset : uint32_t { + LoadStoreRegisterOffsetFixed = 0x38200800u, + LoadStoreRegisterOffsetFMask = 0x3B200C00u, + LoadStoreRegisterOffsetMask = 0xFFE00C00u, + PRFM_reg = LoadStoreRegisterOffsetFixed | PRFM, + #define LOAD_STORE_REGISTER_OFFSET(A, B, C, D) \ + A##B##_##C##_reg = LoadStoreRegisterOffsetFixed | D + LOAD_STORE_OP_LIST(LOAD_STORE_REGISTER_OFFSET) + #undef LOAD_STORE_REGISTER_OFFSET +}; + +enum LoadStoreExclusive : uint32_t { + LoadStoreExclusiveFixed = 0x08000000u, + LoadStoreExclusiveFMask = 0x3F000000u, + LoadStoreExclusiveMask = 0xFFE08000u, + STXRB_w = LoadStoreExclusiveFixed | 0x00000000u, + STXRH_w = LoadStoreExclusiveFixed | 0x40000000u, + STXR_w = LoadStoreExclusiveFixed | 0x80000000u, + STXR_x = LoadStoreExclusiveFixed | 0xC0000000u, + LDXRB_w = LoadStoreExclusiveFixed | 0x00400000u, + LDXRH_w = LoadStoreExclusiveFixed | 0x40400000u, + LDXR_w = LoadStoreExclusiveFixed | 0x80400000u, + LDXR_x = LoadStoreExclusiveFixed | 0xC0400000u, + STXP_w = LoadStoreExclusiveFixed | 0x80200000u, + STXP_x = LoadStoreExclusiveFixed | 0xC0200000u, + LDXP_w = LoadStoreExclusiveFixed | 0x80600000u, + LDXP_x = LoadStoreExclusiveFixed | 0xC0600000u, + STLXRB_w = LoadStoreExclusiveFixed | 0x00008000u, + STLXRH_w = LoadStoreExclusiveFixed | 0x40008000u, + STLXR_w = LoadStoreExclusiveFixed | 0x80008000u, + STLXR_x = LoadStoreExclusiveFixed | 0xC0008000u, + LDAXRB_w = LoadStoreExclusiveFixed | 0x00408000u, + LDAXRH_w = LoadStoreExclusiveFixed | 0x40408000u, + LDAXR_w = LoadStoreExclusiveFixed | 0x80408000u, + LDAXR_x = LoadStoreExclusiveFixed | 0xC0408000u, + STLXP_w = LoadStoreExclusiveFixed | 0x80208000u, + STLXP_x = LoadStoreExclusiveFixed | 0xC0208000u, + LDAXP_w = LoadStoreExclusiveFixed | 0x80608000u, + LDAXP_x = LoadStoreExclusiveFixed | 0xC0608000u, + STLRB_w = LoadStoreExclusiveFixed | 0x00808000u, + STLRH_w = LoadStoreExclusiveFixed | 0x40808000u, + STLR_w = LoadStoreExclusiveFixed | 0x80808000u, + STLR_x = LoadStoreExclusiveFixed | 0xC0808000u, + LDARB_w = LoadStoreExclusiveFixed | 0x00C08000u, + LDARH_w = LoadStoreExclusiveFixed | 0x40C08000u, + LDAR_w = LoadStoreExclusiveFixed | 0x80C08000u, + LDAR_x = LoadStoreExclusiveFixed | 0xC0C08000u, + + // v8.1 Load/store LORegion ops + STLLRB = LoadStoreExclusiveFixed | 0x00800000u, + LDLARB = LoadStoreExclusiveFixed | 0x00C00000u, + STLLRH = LoadStoreExclusiveFixed | 0x40800000u, + LDLARH = LoadStoreExclusiveFixed | 0x40C00000u, + STLLR_w = LoadStoreExclusiveFixed | 0x80800000u, + LDLAR_w = LoadStoreExclusiveFixed | 0x80C00000u, + STLLR_x = LoadStoreExclusiveFixed | 0xC0800000u, + LDLAR_x = LoadStoreExclusiveFixed | 0xC0C00000u, + + // v8.1 Load/store exclusive ops + LSEBit_l = 0x00400000u, + LSEBit_o0 = 0x00008000u, + LSEBit_sz = 0x40000000u, + CASFixed = LoadStoreExclusiveFixed | 0x80A00000u, + CASBFixed = LoadStoreExclusiveFixed | 0x00A00000u, + CASHFixed = LoadStoreExclusiveFixed | 0x40A00000u, + CASPFixed = LoadStoreExclusiveFixed | 0x00200000u, + CAS_w = CASFixed, + CAS_x = CASFixed | LSEBit_sz, + CASA_w = CASFixed | LSEBit_l, + CASA_x = CASFixed | LSEBit_l | LSEBit_sz, + CASL_w = CASFixed | LSEBit_o0, + CASL_x = CASFixed | LSEBit_o0 | LSEBit_sz, + CASAL_w = CASFixed | LSEBit_l | LSEBit_o0, + CASAL_x = CASFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz, + CASB = CASBFixed, + CASAB = CASBFixed | LSEBit_l, + CASLB = CASBFixed | LSEBit_o0, + CASALB = CASBFixed | LSEBit_l | LSEBit_o0, + CASH = CASHFixed, + CASAH = CASHFixed | LSEBit_l, + CASLH = CASHFixed | LSEBit_o0, + CASALH = CASHFixed | LSEBit_l | LSEBit_o0, + CASP_w = CASPFixed, + CASP_x = CASPFixed | LSEBit_sz, + CASPA_w = CASPFixed | LSEBit_l, + CASPA_x = CASPFixed | LSEBit_l | LSEBit_sz, + CASPL_w = CASPFixed | LSEBit_o0, + CASPL_x = CASPFixed | LSEBit_o0 | LSEBit_sz, + CASPAL_w = CASPFixed | LSEBit_l | LSEBit_o0, + CASPAL_x = CASPFixed | LSEBit_l | LSEBit_o0 | LSEBit_sz +}; + +// Load/store RCpc unscaled offset. +enum LoadStoreRCpcUnscaledOffsetOp : uint32_t { + LoadStoreRCpcUnscaledOffsetFixed = 0x19000000u, + LoadStoreRCpcUnscaledOffsetFMask = 0x3F200C00u, + LoadStoreRCpcUnscaledOffsetMask = 0xFFE00C00u, + STLURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00000000u, + LDAPURB = LoadStoreRCpcUnscaledOffsetFixed | 0x00400000u, + LDAPURSB_x = LoadStoreRCpcUnscaledOffsetFixed | 0x00800000u, + LDAPURSB_w = LoadStoreRCpcUnscaledOffsetFixed | 0x00C00000u, + STLURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40000000u, + LDAPURH = LoadStoreRCpcUnscaledOffsetFixed | 0x40400000u, + LDAPURSH_x = LoadStoreRCpcUnscaledOffsetFixed | 0x40800000u, + LDAPURSH_w = LoadStoreRCpcUnscaledOffsetFixed | 0x40C00000u, + STLUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80000000u, + LDAPUR_w = LoadStoreRCpcUnscaledOffsetFixed | 0x80400000u, + LDAPURSW = LoadStoreRCpcUnscaledOffsetFixed | 0x80800000u, + STLUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0000000u, + LDAPUR_x = LoadStoreRCpcUnscaledOffsetFixed | 0xC0400000u +}; + +#define ATOMIC_MEMORY_SIMPLE_OPC_LIST(V) \ + V(LDADD, 0x00000000u), \ + V(LDCLR, 0x00001000u), \ + V(LDEOR, 0x00002000u), \ + V(LDSET, 0x00003000u), \ + V(LDSMAX, 0x00004000u), \ + V(LDSMIN, 0x00005000u), \ + V(LDUMAX, 0x00006000u), \ + V(LDUMIN, 0x00007000u) + +// Atomic memory. +enum AtomicMemoryOp : uint32_t { + AtomicMemoryFixed = 0x38200000u, + AtomicMemoryFMask = 0x3B200C00u, + AtomicMemoryMask = 0xFFE0FC00u, + SWPB = AtomicMemoryFixed | 0x00008000u, + SWPAB = AtomicMemoryFixed | 0x00808000u, + SWPLB = AtomicMemoryFixed | 0x00408000u, + SWPALB = AtomicMemoryFixed | 0x00C08000u, + SWPH = AtomicMemoryFixed | 0x40008000u, + SWPAH = AtomicMemoryFixed | 0x40808000u, + SWPLH = AtomicMemoryFixed | 0x40408000u, + SWPALH = AtomicMemoryFixed | 0x40C08000u, + SWP_w = AtomicMemoryFixed | 0x80008000u, + SWPA_w = AtomicMemoryFixed | 0x80808000u, + SWPL_w = AtomicMemoryFixed | 0x80408000u, + SWPAL_w = AtomicMemoryFixed | 0x80C08000u, + SWP_x = AtomicMemoryFixed | 0xC0008000u, + SWPA_x = AtomicMemoryFixed | 0xC0808000u, + SWPL_x = AtomicMemoryFixed | 0xC0408000u, + SWPAL_x = AtomicMemoryFixed | 0xC0C08000u, + LDAPRB = AtomicMemoryFixed | 0x0080C000u, + LDAPRH = AtomicMemoryFixed | 0x4080C000u, + LDAPR_w = AtomicMemoryFixed | 0x8080C000u, + LDAPR_x = AtomicMemoryFixed | 0xC080C000u, + + AtomicMemorySimpleFMask = 0x3B208C00u, + AtomicMemorySimpleOpMask = 0x00007000u, +#define ATOMIC_MEMORY_SIMPLE(N, OP) \ + N##Op = OP, \ + N##B = AtomicMemoryFixed | OP, \ + N##AB = AtomicMemoryFixed | OP | 0x00800000u, \ + N##LB = AtomicMemoryFixed | OP | 0x00400000u, \ + N##ALB = AtomicMemoryFixed | OP | 0x00C00000u, \ + N##H = AtomicMemoryFixed | OP | 0x40000000u, \ + N##AH = AtomicMemoryFixed | OP | 0x40800000u, \ + N##LH = AtomicMemoryFixed | OP | 0x40400000u, \ + N##ALH = AtomicMemoryFixed | OP | 0x40C00000u, \ + N##_w = AtomicMemoryFixed | OP | 0x80000000u, \ + N##A_w = AtomicMemoryFixed | OP | 0x80800000u, \ + N##L_w = AtomicMemoryFixed | OP | 0x80400000u, \ + N##AL_w = AtomicMemoryFixed | OP | 0x80C00000u, \ + N##_x = AtomicMemoryFixed | OP | 0xC0000000u, \ + N##A_x = AtomicMemoryFixed | OP | 0xC0800000u, \ + N##L_x = AtomicMemoryFixed | OP | 0xC0400000u, \ + N##AL_x = AtomicMemoryFixed | OP | 0xC0C00000u + + ATOMIC_MEMORY_SIMPLE_OPC_LIST(ATOMIC_MEMORY_SIMPLE) +#undef ATOMIC_MEMORY_SIMPLE +}; + +// Conditional compare. +enum ConditionalCompareOp : uint32_t { + ConditionalCompareMask = 0x60000000u, + CCMN = 0x20000000u, + CCMP = 0x60000000u +}; + +// Conditional compare register. +enum ConditionalCompareRegisterOp : uint32_t { + ConditionalCompareRegisterFixed = 0x1A400000u, + ConditionalCompareRegisterFMask = 0x1FE00800u, + ConditionalCompareRegisterMask = 0xFFE00C10u, + CCMN_w = ConditionalCompareRegisterFixed | CCMN, + CCMN_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMN, + CCMP_w = ConditionalCompareRegisterFixed | CCMP, + CCMP_x = ConditionalCompareRegisterFixed | SixtyFourBits | CCMP +}; + +// Conditional compare immediate. +enum ConditionalCompareImmediateOp : uint32_t { + ConditionalCompareImmediateFixed = 0x1A400800u, + ConditionalCompareImmediateFMask = 0x1FE00800u, + ConditionalCompareImmediateMask = 0xFFE00C10u, + CCMN_w_imm = ConditionalCompareImmediateFixed | CCMN, + CCMN_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMN, + CCMP_w_imm = ConditionalCompareImmediateFixed | CCMP, + CCMP_x_imm = ConditionalCompareImmediateFixed | SixtyFourBits | CCMP +}; + +// Conditional select. +enum ConditionalSelectOp : uint32_t { + ConditionalSelectFixed = 0x1A800000u, + ConditionalSelectFMask = 0x1FE00000u, + ConditionalSelectMask = 0xFFE00C00u, + CSEL_w = ConditionalSelectFixed | 0x00000000u, + CSEL_x = ConditionalSelectFixed | 0x80000000u, + CSEL = CSEL_w, + CSINC_w = ConditionalSelectFixed | 0x00000400u, + CSINC_x = ConditionalSelectFixed | 0x80000400u, + CSINC = CSINC_w, + CSINV_w = ConditionalSelectFixed | 0x40000000u, + CSINV_x = ConditionalSelectFixed | 0xC0000000u, + CSINV = CSINV_w, + CSNEG_w = ConditionalSelectFixed | 0x40000400u, + CSNEG_x = ConditionalSelectFixed | 0xC0000400u, + CSNEG = CSNEG_w +}; + +// Data processing 1 source. +enum DataProcessing1SourceOp : uint32_t { + DataProcessing1SourceFixed = 0x5AC00000u, + DataProcessing1SourceFMask = 0x5FE00000u, + DataProcessing1SourceMask = 0xFFFFFC00u, + RBIT = DataProcessing1SourceFixed | 0x00000000u, + RBIT_w = RBIT, + RBIT_x = RBIT | SixtyFourBits, + REV16 = DataProcessing1SourceFixed | 0x00000400u, + REV16_w = REV16, + REV16_x = REV16 | SixtyFourBits, + REV = DataProcessing1SourceFixed | 0x00000800u, + REV_w = REV, + REV32_x = REV | SixtyFourBits, + REV_x = DataProcessing1SourceFixed | SixtyFourBits | 0x00000C00u, + CLZ = DataProcessing1SourceFixed | 0x00001000u, + CLZ_w = CLZ, + CLZ_x = CLZ | SixtyFourBits, + CLS = DataProcessing1SourceFixed | 0x00001400u, + CLS_w = CLS, + CLS_x = CLS | SixtyFourBits, + + // Pointer authentication instructions in Armv8.3. + PACIA = DataProcessing1SourceFixed | 0x80010000u, + PACIB = DataProcessing1SourceFixed | 0x80010400u, + PACDA = DataProcessing1SourceFixed | 0x80010800u, + PACDB = DataProcessing1SourceFixed | 0x80010C00u, + AUTIA = DataProcessing1SourceFixed | 0x80011000u, + AUTIB = DataProcessing1SourceFixed | 0x80011400u, + AUTDA = DataProcessing1SourceFixed | 0x80011800u, + AUTDB = DataProcessing1SourceFixed | 0x80011C00u, + PACIZA = DataProcessing1SourceFixed | 0x80012000u, + PACIZB = DataProcessing1SourceFixed | 0x80012400u, + PACDZA = DataProcessing1SourceFixed | 0x80012800u, + PACDZB = DataProcessing1SourceFixed | 0x80012C00u, + AUTIZA = DataProcessing1SourceFixed | 0x80013000u, + AUTIZB = DataProcessing1SourceFixed | 0x80013400u, + AUTDZA = DataProcessing1SourceFixed | 0x80013800u, + AUTDZB = DataProcessing1SourceFixed | 0x80013C00u, + XPACI = DataProcessing1SourceFixed | 0x80014000u, + XPACD = DataProcessing1SourceFixed | 0x80014400u +}; + +// Data processing 2 source. +enum DataProcessing2SourceOp : uint32_t { + DataProcessing2SourceFixed = 0x1AC00000u, + DataProcessing2SourceFMask = 0x5FE00000u, + DataProcessing2SourceMask = 0xFFE0FC00u, + UDIV_w = DataProcessing2SourceFixed | 0x00000800u, + UDIV_x = DataProcessing2SourceFixed | 0x80000800u, + UDIV = UDIV_w, + SDIV_w = DataProcessing2SourceFixed | 0x00000C00u, + SDIV_x = DataProcessing2SourceFixed | 0x80000C00u, + SDIV = SDIV_w, + LSLV_w = DataProcessing2SourceFixed | 0x00002000u, + LSLV_x = DataProcessing2SourceFixed | 0x80002000u, + LSLV = LSLV_w, + LSRV_w = DataProcessing2SourceFixed | 0x00002400u, + LSRV_x = DataProcessing2SourceFixed | 0x80002400u, + LSRV = LSRV_w, + ASRV_w = DataProcessing2SourceFixed | 0x00002800u, + ASRV_x = DataProcessing2SourceFixed | 0x80002800u, + ASRV = ASRV_w, + RORV_w = DataProcessing2SourceFixed | 0x00002C00u, + RORV_x = DataProcessing2SourceFixed | 0x80002C00u, + RORV = RORV_w, + PACGA = DataProcessing2SourceFixed | SixtyFourBits | 0x00003000u, + CRC32B = DataProcessing2SourceFixed | 0x00004000u, + CRC32H = DataProcessing2SourceFixed | 0x00004400u, + CRC32W = DataProcessing2SourceFixed | 0x00004800u, + CRC32X = DataProcessing2SourceFixed | SixtyFourBits | 0x00004C00u, + CRC32CB = DataProcessing2SourceFixed | 0x00005000u, + CRC32CH = DataProcessing2SourceFixed | 0x00005400u, + CRC32CW = DataProcessing2SourceFixed | 0x00005800u, + CRC32CX = DataProcessing2SourceFixed | SixtyFourBits | 0x00005C00u +}; + +// Data processing 3 source. +enum DataProcessing3SourceOp : uint32_t { + DataProcessing3SourceFixed = 0x1B000000u, + DataProcessing3SourceFMask = 0x1F000000u, + DataProcessing3SourceMask = 0xFFE08000u, + MADD_w = DataProcessing3SourceFixed | 0x00000000u, + MADD_x = DataProcessing3SourceFixed | 0x80000000u, + MADD = MADD_w, + MSUB_w = DataProcessing3SourceFixed | 0x00008000u, + MSUB_x = DataProcessing3SourceFixed | 0x80008000u, + MSUB = MSUB_w, + SMADDL_x = DataProcessing3SourceFixed | 0x80200000u, + SMSUBL_x = DataProcessing3SourceFixed | 0x80208000u, + SMULH_x = DataProcessing3SourceFixed | 0x80400000u, + UMADDL_x = DataProcessing3SourceFixed | 0x80A00000u, + UMSUBL_x = DataProcessing3SourceFixed | 0x80A08000u, + UMULH_x = DataProcessing3SourceFixed | 0x80C00000u +}; + +// Floating point compare. +enum FPCompareOp : uint32_t { + FPCompareFixed = 0x1E202000u, + FPCompareFMask = 0x5F203C00u, + FPCompareMask = 0xFFE0FC1Fu, + FCMP_h = FPCompareFixed | FP16 | 0x00000000u, + FCMP_s = FPCompareFixed | 0x00000000u, + FCMP_d = FPCompareFixed | FP64 | 0x00000000u, + FCMP = FCMP_s, + FCMP_h_zero = FPCompareFixed | FP16 | 0x00000008u, + FCMP_s_zero = FPCompareFixed | 0x00000008u, + FCMP_d_zero = FPCompareFixed | FP64 | 0x00000008u, + FCMP_zero = FCMP_s_zero, + FCMPE_h = FPCompareFixed | FP16 | 0x00000010u, + FCMPE_s = FPCompareFixed | 0x00000010u, + FCMPE_d = FPCompareFixed | FP64 | 0x00000010u, + FCMPE = FCMPE_s, + FCMPE_h_zero = FPCompareFixed | FP16 | 0x00000018u, + FCMPE_s_zero = FPCompareFixed | 0x00000018u, + FCMPE_d_zero = FPCompareFixed | FP64 | 0x00000018u, + FCMPE_zero = FCMPE_s_zero +}; + +// Floating point conditional compare. +enum FPConditionalCompareOp : uint32_t { + FPConditionalCompareFixed = 0x1E200400u, + FPConditionalCompareFMask = 0x5F200C00u, + FPConditionalCompareMask = 0xFFE00C10u, + FCCMP_h = FPConditionalCompareFixed | FP16 | 0x00000000u, + FCCMP_s = FPConditionalCompareFixed | 0x00000000u, + FCCMP_d = FPConditionalCompareFixed | FP64 | 0x00000000u, + FCCMP = FCCMP_s, + FCCMPE_h = FPConditionalCompareFixed | FP16 | 0x00000010u, + FCCMPE_s = FPConditionalCompareFixed | 0x00000010u, + FCCMPE_d = FPConditionalCompareFixed | FP64 | 0x00000010u, + FCCMPE = FCCMPE_s +}; + +// Floating point conditional select. +enum FPConditionalSelectOp : uint32_t { + FPConditionalSelectFixed = 0x1E200C00u, + FPConditionalSelectFMask = 0x5F200C00u, + FPConditionalSelectMask = 0xFFE00C00u, + FCSEL_h = FPConditionalSelectFixed | FP16 | 0x00000000u, + FCSEL_s = FPConditionalSelectFixed | 0x00000000u, + FCSEL_d = FPConditionalSelectFixed | FP64 | 0x00000000u, + FCSEL = FCSEL_s +}; + +// Floating point immediate. +enum FPImmediateOp : uint32_t { + FPImmediateFixed = 0x1E201000u, + FPImmediateFMask = 0x5F201C00u, + FPImmediateMask = 0xFFE01C00u, + FMOV_h_imm = FPImmediateFixed | FP16 | 0x00000000u, + FMOV_s_imm = FPImmediateFixed | 0x00000000u, + FMOV_d_imm = FPImmediateFixed | FP64 | 0x00000000u +}; + +// Floating point data processing 1 source. +enum FPDataProcessing1SourceOp : uint32_t { + FPDataProcessing1SourceFixed = 0x1E204000u, + FPDataProcessing1SourceFMask = 0x5F207C00u, + FPDataProcessing1SourceMask = 0xFFFFFC00u, + FMOV_h = FPDataProcessing1SourceFixed | FP16 | 0x00000000u, + FMOV_s = FPDataProcessing1SourceFixed | 0x00000000u, + FMOV_d = FPDataProcessing1SourceFixed | FP64 | 0x00000000u, + FMOV = FMOV_s, + FABS_h = FPDataProcessing1SourceFixed | FP16 | 0x00008000u, + FABS_s = FPDataProcessing1SourceFixed | 0x00008000u, + FABS_d = FPDataProcessing1SourceFixed | FP64 | 0x00008000u, + FABS = FABS_s, + FNEG_h = FPDataProcessing1SourceFixed | FP16 | 0x00010000u, + FNEG_s = FPDataProcessing1SourceFixed | 0x00010000u, + FNEG_d = FPDataProcessing1SourceFixed | FP64 | 0x00010000u, + FNEG = FNEG_s, + FSQRT_h = FPDataProcessing1SourceFixed | FP16 | 0x00018000u, + FSQRT_s = FPDataProcessing1SourceFixed | 0x00018000u, + FSQRT_d = FPDataProcessing1SourceFixed | FP64 | 0x00018000u, + FSQRT = FSQRT_s, + FCVT_ds = FPDataProcessing1SourceFixed | 0x00028000, + FCVT_sd = FPDataProcessing1SourceFixed | FP64 | 0x00020000, + FCVT_hs = FPDataProcessing1SourceFixed | 0x00038000, + FCVT_hd = FPDataProcessing1SourceFixed | FP64 | 0x00038000, + FCVT_sh = FPDataProcessing1SourceFixed | 0x00C20000, + FCVT_dh = FPDataProcessing1SourceFixed | 0x00C28000, + FRINT32X_s = FPDataProcessing1SourceFixed | 0x00088000u, + FRINT32X_d = FPDataProcessing1SourceFixed | FP64 | 0x00088000u, + FRINT32X = FRINT32X_s, + FRINT32Z_s = FPDataProcessing1SourceFixed | 0x00080000u, + FRINT32Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00080000u, + FRINT32Z = FRINT32Z_s, + FRINT64X_s = FPDataProcessing1SourceFixed | 0x00098000u, + FRINT64X_d = FPDataProcessing1SourceFixed | FP64 | 0x00098000u, + FRINT64X = FRINT64X_s, + FRINT64Z_s = FPDataProcessing1SourceFixed | 0x00090000u, + FRINT64Z_d = FPDataProcessing1SourceFixed | FP64 | 0x00090000u, + FRINT64Z = FRINT64Z_s, + FRINTN_h = FPDataProcessing1SourceFixed | FP16 | 0x00040000u, + FRINTN_s = FPDataProcessing1SourceFixed | 0x00040000u, + FRINTN_d = FPDataProcessing1SourceFixed | FP64 | 0x00040000u, + FRINTN = FRINTN_s, + FRINTP_h = FPDataProcessing1SourceFixed | FP16 | 0x00048000u, + FRINTP_s = FPDataProcessing1SourceFixed | 0x00048000u, + FRINTP_d = FPDataProcessing1SourceFixed | FP64 | 0x00048000u, + FRINTP = FRINTP_s, + FRINTM_h = FPDataProcessing1SourceFixed | FP16 | 0x00050000u, + FRINTM_s = FPDataProcessing1SourceFixed | 0x00050000u, + FRINTM_d = FPDataProcessing1SourceFixed | FP64 | 0x00050000u, + FRINTM = FRINTM_s, + FRINTZ_h = FPDataProcessing1SourceFixed | FP16 | 0x00058000u, + FRINTZ_s = FPDataProcessing1SourceFixed | 0x00058000u, + FRINTZ_d = FPDataProcessing1SourceFixed | FP64 | 0x00058000u, + FRINTZ = FRINTZ_s, + FRINTA_h = FPDataProcessing1SourceFixed | FP16 | 0x00060000u, + FRINTA_s = FPDataProcessing1SourceFixed | 0x00060000u, + FRINTA_d = FPDataProcessing1SourceFixed | FP64 | 0x00060000u, + FRINTA = FRINTA_s, + FRINTX_h = FPDataProcessing1SourceFixed | FP16 | 0x00070000u, + FRINTX_s = FPDataProcessing1SourceFixed | 0x00070000u, + FRINTX_d = FPDataProcessing1SourceFixed | FP64 | 0x00070000u, + FRINTX = FRINTX_s, + FRINTI_h = FPDataProcessing1SourceFixed | FP16 | 0x00078000u, + FRINTI_s = FPDataProcessing1SourceFixed | 0x00078000u, + FRINTI_d = FPDataProcessing1SourceFixed | FP64 | 0x00078000u, + FRINTI = FRINTI_s +}; + +// Floating point data processing 2 source. +enum FPDataProcessing2SourceOp : uint32_t { + FPDataProcessing2SourceFixed = 0x1E200800u, + FPDataProcessing2SourceFMask = 0x5F200C00u, + FPDataProcessing2SourceMask = 0xFFE0FC00u, + FMUL = FPDataProcessing2SourceFixed | 0x00000000u, + FMUL_h = FMUL | FP16, + FMUL_s = FMUL, + FMUL_d = FMUL | FP64, + FDIV = FPDataProcessing2SourceFixed | 0x00001000u, + FDIV_h = FDIV | FP16, + FDIV_s = FDIV, + FDIV_d = FDIV | FP64, + FADD = FPDataProcessing2SourceFixed | 0x00002000u, + FADD_h = FADD | FP16, + FADD_s = FADD, + FADD_d = FADD | FP64, + FSUB = FPDataProcessing2SourceFixed | 0x00003000u, + FSUB_h = FSUB | FP16, + FSUB_s = FSUB, + FSUB_d = FSUB | FP64, + FMAX = FPDataProcessing2SourceFixed | 0x00004000u, + FMAX_h = FMAX | FP16, + FMAX_s = FMAX, + FMAX_d = FMAX | FP64, + FMIN = FPDataProcessing2SourceFixed | 0x00005000u, + FMIN_h = FMIN | FP16, + FMIN_s = FMIN, + FMIN_d = FMIN | FP64, + FMAXNM = FPDataProcessing2SourceFixed | 0x00006000u, + FMAXNM_h = FMAXNM | FP16, + FMAXNM_s = FMAXNM, + FMAXNM_d = FMAXNM | FP64, + FMINNM = FPDataProcessing2SourceFixed | 0x00007000u, + FMINNM_h = FMINNM | FP16, + FMINNM_s = FMINNM, + FMINNM_d = FMINNM | FP64, + FNMUL = FPDataProcessing2SourceFixed | 0x00008000u, + FNMUL_h = FNMUL | FP16, + FNMUL_s = FNMUL, + FNMUL_d = FNMUL | FP64 +}; + +// Floating point data processing 3 source. +enum FPDataProcessing3SourceOp : uint32_t { + FPDataProcessing3SourceFixed = 0x1F000000u, + FPDataProcessing3SourceFMask = 0x5F000000u, + FPDataProcessing3SourceMask = 0xFFE08000u, + FMADD_h = FPDataProcessing3SourceFixed | 0x00C00000u, + FMSUB_h = FPDataProcessing3SourceFixed | 0x00C08000u, + FNMADD_h = FPDataProcessing3SourceFixed | 0x00E00000u, + FNMSUB_h = FPDataProcessing3SourceFixed | 0x00E08000u, + FMADD_s = FPDataProcessing3SourceFixed | 0x00000000u, + FMSUB_s = FPDataProcessing3SourceFixed | 0x00008000u, + FNMADD_s = FPDataProcessing3SourceFixed | 0x00200000u, + FNMSUB_s = FPDataProcessing3SourceFixed | 0x00208000u, + FMADD_d = FPDataProcessing3SourceFixed | 0x00400000u, + FMSUB_d = FPDataProcessing3SourceFixed | 0x00408000u, + FNMADD_d = FPDataProcessing3SourceFixed | 0x00600000u, + FNMSUB_d = FPDataProcessing3SourceFixed | 0x00608000u +}; + +// Conversion between floating point and integer. +enum FPIntegerConvertOp : uint32_t { + FPIntegerConvertFixed = 0x1E200000u, + FPIntegerConvertFMask = 0x5F20FC00u, + FPIntegerConvertMask = 0xFFFFFC00u, + FCVTNS = FPIntegerConvertFixed | 0x00000000u, + FCVTNS_wh = FCVTNS | FP16, + FCVTNS_xh = FCVTNS | SixtyFourBits | FP16, + FCVTNS_ws = FCVTNS, + FCVTNS_xs = FCVTNS | SixtyFourBits, + FCVTNS_wd = FCVTNS | FP64, + FCVTNS_xd = FCVTNS | SixtyFourBits | FP64, + FCVTNU = FPIntegerConvertFixed | 0x00010000u, + FCVTNU_wh = FCVTNU | FP16, + FCVTNU_xh = FCVTNU | SixtyFourBits | FP16, + FCVTNU_ws = FCVTNU, + FCVTNU_xs = FCVTNU | SixtyFourBits, + FCVTNU_wd = FCVTNU | FP64, + FCVTNU_xd = FCVTNU | SixtyFourBits | FP64, + FCVTPS = FPIntegerConvertFixed | 0x00080000u, + FCVTPS_wh = FCVTPS | FP16, + FCVTPS_xh = FCVTPS | SixtyFourBits | FP16, + FCVTPS_ws = FCVTPS, + FCVTPS_xs = FCVTPS | SixtyFourBits, + FCVTPS_wd = FCVTPS | FP64, + FCVTPS_xd = FCVTPS | SixtyFourBits | FP64, + FCVTPU = FPIntegerConvertFixed | 0x00090000u, + FCVTPU_wh = FCVTPU | FP16, + FCVTPU_xh = FCVTPU | SixtyFourBits | FP16, + FCVTPU_ws = FCVTPU, + FCVTPU_xs = FCVTPU | SixtyFourBits, + FCVTPU_wd = FCVTPU | FP64, + FCVTPU_xd = FCVTPU | SixtyFourBits | FP64, + FCVTMS = FPIntegerConvertFixed | 0x00100000u, + FCVTMS_wh = FCVTMS | FP16, + FCVTMS_xh = FCVTMS | SixtyFourBits | FP16, + FCVTMS_ws = FCVTMS, + FCVTMS_xs = FCVTMS | SixtyFourBits, + FCVTMS_wd = FCVTMS | FP64, + FCVTMS_xd = FCVTMS | SixtyFourBits | FP64, + FCVTMU = FPIntegerConvertFixed | 0x00110000u, + FCVTMU_wh = FCVTMU | FP16, + FCVTMU_xh = FCVTMU | SixtyFourBits | FP16, + FCVTMU_ws = FCVTMU, + FCVTMU_xs = FCVTMU | SixtyFourBits, + FCVTMU_wd = FCVTMU | FP64, + FCVTMU_xd = FCVTMU | SixtyFourBits | FP64, + FCVTZS = FPIntegerConvertFixed | 0x00180000u, + FCVTZS_wh = FCVTZS | FP16, + FCVTZS_xh = FCVTZS | SixtyFourBits | FP16, + FCVTZS_ws = FCVTZS, + FCVTZS_xs = FCVTZS | SixtyFourBits, + FCVTZS_wd = FCVTZS | FP64, + FCVTZS_xd = FCVTZS | SixtyFourBits | FP64, + FCVTZU = FPIntegerConvertFixed | 0x00190000u, + FCVTZU_wh = FCVTZU | FP16, + FCVTZU_xh = FCVTZU | SixtyFourBits | FP16, + FCVTZU_ws = FCVTZU, + FCVTZU_xs = FCVTZU | SixtyFourBits, + FCVTZU_wd = FCVTZU | FP64, + FCVTZU_xd = FCVTZU | SixtyFourBits | FP64, + SCVTF = FPIntegerConvertFixed | 0x00020000u, + SCVTF_hw = SCVTF | FP16, + SCVTF_hx = SCVTF | SixtyFourBits | FP16, + SCVTF_sw = SCVTF, + SCVTF_sx = SCVTF | SixtyFourBits, + SCVTF_dw = SCVTF | FP64, + SCVTF_dx = SCVTF | SixtyFourBits | FP64, + UCVTF = FPIntegerConvertFixed | 0x00030000u, + UCVTF_hw = UCVTF | FP16, + UCVTF_hx = UCVTF | SixtyFourBits | FP16, + UCVTF_sw = UCVTF, + UCVTF_sx = UCVTF | SixtyFourBits, + UCVTF_dw = UCVTF | FP64, + UCVTF_dx = UCVTF | SixtyFourBits | FP64, + FCVTAS = FPIntegerConvertFixed | 0x00040000u, + FCVTAS_wh = FCVTAS | FP16, + FCVTAS_xh = FCVTAS | SixtyFourBits | FP16, + FCVTAS_ws = FCVTAS, + FCVTAS_xs = FCVTAS | SixtyFourBits, + FCVTAS_wd = FCVTAS | FP64, + FCVTAS_xd = FCVTAS | SixtyFourBits | FP64, + FCVTAU = FPIntegerConvertFixed | 0x00050000u, + FCVTAU_wh = FCVTAU | FP16, + FCVTAU_xh = FCVTAU | SixtyFourBits | FP16, + FCVTAU_ws = FCVTAU, + FCVTAU_xs = FCVTAU | SixtyFourBits, + FCVTAU_wd = FCVTAU | FP64, + FCVTAU_xd = FCVTAU | SixtyFourBits | FP64, + FMOV_wh = FPIntegerConvertFixed | 0x00060000u | FP16, + FMOV_hw = FPIntegerConvertFixed | 0x00070000u | FP16, + FMOV_xh = FMOV_wh | SixtyFourBits, + FMOV_hx = FMOV_hw | SixtyFourBits, + FMOV_ws = FPIntegerConvertFixed | 0x00060000u, + FMOV_sw = FPIntegerConvertFixed | 0x00070000u, + FMOV_xd = FMOV_ws | SixtyFourBits | FP64, + FMOV_dx = FMOV_sw | SixtyFourBits | FP64, + FMOV_d1_x = FPIntegerConvertFixed | SixtyFourBits | 0x008F0000u, + FMOV_x_d1 = FPIntegerConvertFixed | SixtyFourBits | 0x008E0000u, + FJCVTZS = FPIntegerConvertFixed | FP64 | 0x001E0000 +}; + +// Conversion between fixed point and floating point. +enum FPFixedPointConvertOp : uint32_t { + FPFixedPointConvertFixed = 0x1E000000u, + FPFixedPointConvertFMask = 0x5F200000u, + FPFixedPointConvertMask = 0xFFFF0000u, + FCVTZS_fixed = FPFixedPointConvertFixed | 0x00180000u, + FCVTZS_wh_fixed = FCVTZS_fixed | FP16, + FCVTZS_xh_fixed = FCVTZS_fixed | SixtyFourBits | FP16, + FCVTZS_ws_fixed = FCVTZS_fixed, + FCVTZS_xs_fixed = FCVTZS_fixed | SixtyFourBits, + FCVTZS_wd_fixed = FCVTZS_fixed | FP64, + FCVTZS_xd_fixed = FCVTZS_fixed | SixtyFourBits | FP64, + FCVTZU_fixed = FPFixedPointConvertFixed | 0x00190000u, + FCVTZU_wh_fixed = FCVTZU_fixed | FP16, + FCVTZU_xh_fixed = FCVTZU_fixed | SixtyFourBits | FP16, + FCVTZU_ws_fixed = FCVTZU_fixed, + FCVTZU_xs_fixed = FCVTZU_fixed | SixtyFourBits, + FCVTZU_wd_fixed = FCVTZU_fixed | FP64, + FCVTZU_xd_fixed = FCVTZU_fixed | SixtyFourBits | FP64, + SCVTF_fixed = FPFixedPointConvertFixed | 0x00020000u, + SCVTF_hw_fixed = SCVTF_fixed | FP16, + SCVTF_hx_fixed = SCVTF_fixed | SixtyFourBits | FP16, + SCVTF_sw_fixed = SCVTF_fixed, + SCVTF_sx_fixed = SCVTF_fixed | SixtyFourBits, + SCVTF_dw_fixed = SCVTF_fixed | FP64, + SCVTF_dx_fixed = SCVTF_fixed | SixtyFourBits | FP64, + UCVTF_fixed = FPFixedPointConvertFixed | 0x00030000u, + UCVTF_hw_fixed = UCVTF_fixed | FP16, + UCVTF_hx_fixed = UCVTF_fixed | SixtyFourBits | FP16, + UCVTF_sw_fixed = UCVTF_fixed, + UCVTF_sx_fixed = UCVTF_fixed | SixtyFourBits, + UCVTF_dw_fixed = UCVTF_fixed | FP64, + UCVTF_dx_fixed = UCVTF_fixed | SixtyFourBits | FP64 +}; + +// Crypto - two register SHA. +enum Crypto2RegSHAOp : uint32_t { + Crypto2RegSHAFixed = 0x5E280800u, + Crypto2RegSHAFMask = 0xFF3E0C00u +}; + +// Crypto - three register SHA. +enum Crypto3RegSHAOp : uint32_t { + Crypto3RegSHAFixed = 0x5E000000u, + Crypto3RegSHAFMask = 0xFF208C00u +}; + +// Crypto - AES. +enum CryptoAESOp : uint32_t { + CryptoAESFixed = 0x4E280800u, + CryptoAESFMask = 0xFF3E0C00u +}; + +// NEON instructions with two register operands. +enum NEON2RegMiscOp : uint32_t { + NEON2RegMiscFixed = 0x0E200800u, + NEON2RegMiscFMask = 0x9F3E0C00u, + NEON2RegMiscMask = 0xBF3FFC00u, + NEON2RegMiscUBit = 0x20000000u, + NEON_REV64 = NEON2RegMiscFixed | 0x00000000u, + NEON_REV32 = NEON2RegMiscFixed | 0x20000000u, + NEON_REV16 = NEON2RegMiscFixed | 0x00001000u, + NEON_SADDLP = NEON2RegMiscFixed | 0x00002000u, + NEON_UADDLP = NEON_SADDLP | NEON2RegMiscUBit, + NEON_SUQADD = NEON2RegMiscFixed | 0x00003000u, + NEON_USQADD = NEON_SUQADD | NEON2RegMiscUBit, + NEON_CLS = NEON2RegMiscFixed | 0x00004000u, + NEON_CLZ = NEON2RegMiscFixed | 0x20004000u, + NEON_CNT = NEON2RegMiscFixed | 0x00005000u, + NEON_RBIT_NOT = NEON2RegMiscFixed | 0x20005000u, + NEON_SADALP = NEON2RegMiscFixed | 0x00006000u, + NEON_UADALP = NEON_SADALP | NEON2RegMiscUBit, + NEON_SQABS = NEON2RegMiscFixed | 0x00007000u, + NEON_SQNEG = NEON2RegMiscFixed | 0x20007000u, + NEON_CMGT_zero = NEON2RegMiscFixed | 0x00008000u, + NEON_CMGE_zero = NEON2RegMiscFixed | 0x20008000u, + NEON_CMEQ_zero = NEON2RegMiscFixed | 0x00009000u, + NEON_CMLE_zero = NEON2RegMiscFixed | 0x20009000u, + NEON_CMLT_zero = NEON2RegMiscFixed | 0x0000A000u, + NEON_ABS = NEON2RegMiscFixed | 0x0000B000u, + NEON_NEG = NEON2RegMiscFixed | 0x2000B000u, + NEON_XTN = NEON2RegMiscFixed | 0x00012000u, + NEON_SQXTUN = NEON2RegMiscFixed | 0x20012000u, + NEON_SHLL = NEON2RegMiscFixed | 0x20013000u, + NEON_SQXTN = NEON2RegMiscFixed | 0x00014000u, + NEON_UQXTN = NEON_SQXTN | NEON2RegMiscUBit, + + NEON2RegMiscOpcode = 0x0001F000u, + NEON_RBIT_NOT_opcode = NEON_RBIT_NOT & NEON2RegMiscOpcode, + NEON_NEG_opcode = NEON_NEG & NEON2RegMiscOpcode, + NEON_XTN_opcode = NEON_XTN & NEON2RegMiscOpcode, + NEON_UQXTN_opcode = NEON_UQXTN & NEON2RegMiscOpcode, + + // These instructions use only one bit of the size field. The other bit is + // used to distinguish between instructions. + NEON2RegMiscFPMask = NEON2RegMiscMask | 0x00800000u, + NEON_FABS = NEON2RegMiscFixed | 0x0080F000u, + NEON_FNEG = NEON2RegMiscFixed | 0x2080F000u, + NEON_FCVTN = NEON2RegMiscFixed | 0x00016000u, + NEON_FCVTXN = NEON2RegMiscFixed | 0x20016000u, + NEON_FCVTL = NEON2RegMiscFixed | 0x00017000u, + NEON_FRINT32X = NEON2RegMiscFixed | 0x2001E000u, + NEON_FRINT32Z = NEON2RegMiscFixed | 0x0001E000u, + NEON_FRINT64X = NEON2RegMiscFixed | 0x2001F000u, + NEON_FRINT64Z = NEON2RegMiscFixed | 0x0001F000u, + NEON_FRINTN = NEON2RegMiscFixed | 0x00018000u, + NEON_FRINTA = NEON2RegMiscFixed | 0x20018000u, + NEON_FRINTP = NEON2RegMiscFixed | 0x00818000u, + NEON_FRINTM = NEON2RegMiscFixed | 0x00019000u, + NEON_FRINTX = NEON2RegMiscFixed | 0x20019000u, + NEON_FRINTZ = NEON2RegMiscFixed | 0x00819000u, + NEON_FRINTI = NEON2RegMiscFixed | 0x20819000u, + NEON_FCVTNS = NEON2RegMiscFixed | 0x0001A000u, + NEON_FCVTNU = NEON_FCVTNS | NEON2RegMiscUBit, + NEON_FCVTPS = NEON2RegMiscFixed | 0x0081A000u, + NEON_FCVTPU = NEON_FCVTPS | NEON2RegMiscUBit, + NEON_FCVTMS = NEON2RegMiscFixed | 0x0001B000u, + NEON_FCVTMU = NEON_FCVTMS | NEON2RegMiscUBit, + NEON_FCVTZS = NEON2RegMiscFixed | 0x0081B000u, + NEON_FCVTZU = NEON_FCVTZS | NEON2RegMiscUBit, + NEON_FCVTAS = NEON2RegMiscFixed | 0x0001C000u, + NEON_FCVTAU = NEON_FCVTAS | NEON2RegMiscUBit, + NEON_FSQRT = NEON2RegMiscFixed | 0x2081F000u, + NEON_SCVTF = NEON2RegMiscFixed | 0x0001D000u, + NEON_UCVTF = NEON_SCVTF | NEON2RegMiscUBit, + NEON_URSQRTE = NEON2RegMiscFixed | 0x2081C000u, + NEON_URECPE = NEON2RegMiscFixed | 0x0081C000u, + NEON_FRSQRTE = NEON2RegMiscFixed | 0x2081D000u, + NEON_FRECPE = NEON2RegMiscFixed | 0x0081D000u, + NEON_FCMGT_zero = NEON2RegMiscFixed | 0x0080C000u, + NEON_FCMGE_zero = NEON2RegMiscFixed | 0x2080C000u, + NEON_FCMEQ_zero = NEON2RegMiscFixed | 0x0080D000u, + NEON_FCMLE_zero = NEON2RegMiscFixed | 0x2080D000u, + NEON_FCMLT_zero = NEON2RegMiscFixed | 0x0080E000u, + + NEON_FCVTL_opcode = NEON_FCVTL & NEON2RegMiscOpcode, + NEON_FCVTN_opcode = NEON_FCVTN & NEON2RegMiscOpcode +}; + +// NEON instructions with two register operands (FP16). +enum NEON2RegMiscFP16Op : uint32_t { + NEON2RegMiscFP16Fixed = 0x0E780800u, + NEON2RegMiscFP16FMask = 0x9F7E0C00u, + NEON2RegMiscFP16Mask = 0xBFFFFC00u, + NEON_FRINTN_H = NEON2RegMiscFP16Fixed | 0x00018000u, + NEON_FRINTM_H = NEON2RegMiscFP16Fixed | 0x00019000u, + NEON_FCVTNS_H = NEON2RegMiscFP16Fixed | 0x0001A000u, + NEON_FCVTMS_H = NEON2RegMiscFP16Fixed | 0x0001B000u, + NEON_FCVTAS_H = NEON2RegMiscFP16Fixed | 0x0001C000u, + NEON_SCVTF_H = NEON2RegMiscFP16Fixed | 0x0001D000u, + NEON_FCMGT_H_zero = NEON2RegMiscFP16Fixed | 0x0080C000u, + NEON_FCMEQ_H_zero = NEON2RegMiscFP16Fixed | 0x0080D000u, + NEON_FCMLT_H_zero = NEON2RegMiscFP16Fixed | 0x0080E000u, + NEON_FABS_H = NEON2RegMiscFP16Fixed | 0x0080F000u, + NEON_FRINTP_H = NEON2RegMiscFP16Fixed | 0x00818000u, + NEON_FRINTZ_H = NEON2RegMiscFP16Fixed | 0x00819000u, + NEON_FCVTPS_H = NEON2RegMiscFP16Fixed | 0x0081A000u, + NEON_FCVTZS_H = NEON2RegMiscFP16Fixed | 0x0081B000u, + NEON_FRECPE_H = NEON2RegMiscFP16Fixed | 0x0081D000u, + NEON_FRINTA_H = NEON2RegMiscFP16Fixed | 0x20018000u, + NEON_FRINTX_H = NEON2RegMiscFP16Fixed | 0x20019000u, + NEON_FCVTNU_H = NEON2RegMiscFP16Fixed | 0x2001A000u, + NEON_FCVTMU_H = NEON2RegMiscFP16Fixed | 0x2001B000u, + NEON_FCVTAU_H = NEON2RegMiscFP16Fixed | 0x2001C000u, + NEON_UCVTF_H = NEON2RegMiscFP16Fixed | 0x2001D000u, + NEON_FCMGE_H_zero = NEON2RegMiscFP16Fixed | 0x2080C000u, + NEON_FCMLE_H_zero = NEON2RegMiscFP16Fixed | 0x2080D000u, + NEON_FNEG_H = NEON2RegMiscFP16Fixed | 0x2080F000u, + NEON_FRINTI_H = NEON2RegMiscFP16Fixed | 0x20819000u, + NEON_FCVTPU_H = NEON2RegMiscFP16Fixed | 0x2081A000u, + NEON_FCVTZU_H = NEON2RegMiscFP16Fixed | 0x2081B000u, + NEON_FRSQRTE_H = NEON2RegMiscFP16Fixed | 0x2081D000u, + NEON_FSQRT_H = NEON2RegMiscFP16Fixed | 0x2081F000u +}; + +// NEON instructions with three same-type operands. +enum NEON3SameOp : uint32_t { + NEON3SameFixed = 0x0E200400u, + NEON3SameFMask = 0x9F200400u, + NEON3SameMask = 0xBF20FC00u, + NEON3SameUBit = 0x20000000u, + NEON_ADD = NEON3SameFixed | 0x00008000u, + NEON_ADDP = NEON3SameFixed | 0x0000B800u, + NEON_SHADD = NEON3SameFixed | 0x00000000u, + NEON_SHSUB = NEON3SameFixed | 0x00002000u, + NEON_SRHADD = NEON3SameFixed | 0x00001000u, + NEON_CMEQ = NEON3SameFixed | NEON3SameUBit | 0x00008800u, + NEON_CMGE = NEON3SameFixed | 0x00003800u, + NEON_CMGT = NEON3SameFixed | 0x00003000u, + NEON_CMHI = NEON3SameFixed | NEON3SameUBit | NEON_CMGT, + NEON_CMHS = NEON3SameFixed | NEON3SameUBit | NEON_CMGE, + NEON_CMTST = NEON3SameFixed | 0x00008800u, + NEON_MLA = NEON3SameFixed | 0x00009000u, + NEON_MLS = NEON3SameFixed | 0x20009000u, + NEON_MUL = NEON3SameFixed | 0x00009800u, + NEON_PMUL = NEON3SameFixed | 0x20009800u, + NEON_SRSHL = NEON3SameFixed | 0x00005000u, + NEON_SQSHL = NEON3SameFixed | 0x00004800u, + NEON_SQRSHL = NEON3SameFixed | 0x00005800u, + NEON_SSHL = NEON3SameFixed | 0x00004000u, + NEON_SMAX = NEON3SameFixed | 0x00006000u, + NEON_SMAXP = NEON3SameFixed | 0x0000A000u, + NEON_SMIN = NEON3SameFixed | 0x00006800u, + NEON_SMINP = NEON3SameFixed | 0x0000A800u, + NEON_SABD = NEON3SameFixed | 0x00007000u, + NEON_SABA = NEON3SameFixed | 0x00007800u, + NEON_UABD = NEON3SameFixed | NEON3SameUBit | NEON_SABD, + NEON_UABA = NEON3SameFixed | NEON3SameUBit | NEON_SABA, + NEON_SQADD = NEON3SameFixed | 0x00000800u, + NEON_SQSUB = NEON3SameFixed | 0x00002800u, + NEON_SUB = NEON3SameFixed | NEON3SameUBit | 0x00008000u, + NEON_UHADD = NEON3SameFixed | NEON3SameUBit | NEON_SHADD, + NEON_UHSUB = NEON3SameFixed | NEON3SameUBit | NEON_SHSUB, + NEON_URHADD = NEON3SameFixed | NEON3SameUBit | NEON_SRHADD, + NEON_UMAX = NEON3SameFixed | NEON3SameUBit | NEON_SMAX, + NEON_UMAXP = NEON3SameFixed | NEON3SameUBit | NEON_SMAXP, + NEON_UMIN = NEON3SameFixed | NEON3SameUBit | NEON_SMIN, + NEON_UMINP = NEON3SameFixed | NEON3SameUBit | NEON_SMINP, + NEON_URSHL = NEON3SameFixed | NEON3SameUBit | NEON_SRSHL, + NEON_UQADD = NEON3SameFixed | NEON3SameUBit | NEON_SQADD, + NEON_UQRSHL = NEON3SameFixed | NEON3SameUBit | NEON_SQRSHL, + NEON_UQSHL = NEON3SameFixed | NEON3SameUBit | NEON_SQSHL, + NEON_UQSUB = NEON3SameFixed | NEON3SameUBit | NEON_SQSUB, + NEON_USHL = NEON3SameFixed | NEON3SameUBit | NEON_SSHL, + NEON_SQDMULH = NEON3SameFixed | 0x0000B000u, + NEON_SQRDMULH = NEON3SameFixed | 0x2000B000u, + + // NEON floating point instructions with three same-type operands. + NEON3SameFPFixed = NEON3SameFixed | 0x0000C000u, + NEON3SameFPFMask = NEON3SameFMask | 0x0000C000u, + NEON3SameFPMask = NEON3SameMask | 0x00800000u, + NEON_FADD = NEON3SameFixed | 0x0000D000u, + NEON_FSUB = NEON3SameFixed | 0x0080D000u, + NEON_FMUL = NEON3SameFixed | 0x2000D800u, + NEON_FDIV = NEON3SameFixed | 0x2000F800u, + NEON_FMAX = NEON3SameFixed | 0x0000F000u, + NEON_FMAXNM = NEON3SameFixed | 0x0000C000u, + NEON_FMAXP = NEON3SameFixed | 0x2000F000u, + NEON_FMAXNMP = NEON3SameFixed | 0x2000C000u, + NEON_FMIN = NEON3SameFixed | 0x0080F000u, + NEON_FMINNM = NEON3SameFixed | 0x0080C000u, + NEON_FMINP = NEON3SameFixed | 0x2080F000u, + NEON_FMINNMP = NEON3SameFixed | 0x2080C000u, + NEON_FMLA = NEON3SameFixed | 0x0000C800u, + NEON_FMLS = NEON3SameFixed | 0x0080C800u, + NEON_FMULX = NEON3SameFixed | 0x0000D800u, + NEON_FRECPS = NEON3SameFixed | 0x0000F800u, + NEON_FRSQRTS = NEON3SameFixed | 0x0080F800u, + NEON_FABD = NEON3SameFixed | 0x2080D000u, + NEON_FADDP = NEON3SameFixed | 0x2000D000u, + NEON_FCMEQ = NEON3SameFixed | 0x0000E000u, + NEON_FCMGE = NEON3SameFixed | 0x2000E000u, + NEON_FCMGT = NEON3SameFixed | 0x2080E000u, + NEON_FACGE = NEON3SameFixed | 0x2000E800u, + NEON_FACGT = NEON3SameFixed | 0x2080E800u, + + // NEON logical instructions with three same-type operands. + NEON3SameLogicalFixed = NEON3SameFixed | 0x00001800u, + NEON3SameLogicalFMask = NEON3SameFMask | 0x0000F800u, + NEON3SameLogicalMask = 0xBFE0FC00u, + NEON3SameLogicalFormatMask = NEON_Q, + NEON_AND = NEON3SameLogicalFixed | 0x00000000u, + NEON_ORR = NEON3SameLogicalFixed | 0x00A00000u, + NEON_ORN = NEON3SameLogicalFixed | 0x00C00000u, + NEON_EOR = NEON3SameLogicalFixed | 0x20000000u, + NEON_BIC = NEON3SameLogicalFixed | 0x00400000u, + NEON_BIF = NEON3SameLogicalFixed | 0x20C00000u, + NEON_BIT = NEON3SameLogicalFixed | 0x20800000u, + NEON_BSL = NEON3SameLogicalFixed | 0x20400000u, + + // FHM (FMLAL-like) instructions have an oddball encoding scheme under 3Same. + NEON3SameFHMMask = 0xBFE0FC00u, // U size opcode + NEON_FMLAL = NEON3SameFixed | 0x0000E800u, // 0 00 11101 + NEON_FMLAL2 = NEON3SameFixed | 0x2000C800u, // 1 00 11001 + NEON_FMLSL = NEON3SameFixed | 0x0080E800u, // 0 10 11101 + NEON_FMLSL2 = NEON3SameFixed | 0x2080C800u // 1 10 11001 +}; + + +enum NEON3SameFP16 : uint32_t { + NEON3SameFP16Fixed = 0x0E400400u, + NEON3SameFP16FMask = 0x9F60C400u, + NEON3SameFP16Mask = 0xBFE0FC00u, + NEON_FMAXNM_H = NEON3SameFP16Fixed | 0x00000000u, + NEON_FMLA_H = NEON3SameFP16Fixed | 0x00000800u, + NEON_FADD_H = NEON3SameFP16Fixed | 0x00001000u, + NEON_FMULX_H = NEON3SameFP16Fixed | 0x00001800u, + NEON_FCMEQ_H = NEON3SameFP16Fixed | 0x00002000u, + NEON_FMAX_H = NEON3SameFP16Fixed | 0x00003000u, + NEON_FRECPS_H = NEON3SameFP16Fixed | 0x00003800u, + NEON_FMINNM_H = NEON3SameFP16Fixed | 0x00800000u, + NEON_FMLS_H = NEON3SameFP16Fixed | 0x00800800u, + NEON_FSUB_H = NEON3SameFP16Fixed | 0x00801000u, + NEON_FMIN_H = NEON3SameFP16Fixed | 0x00803000u, + NEON_FRSQRTS_H = NEON3SameFP16Fixed | 0x00803800u, + NEON_FMAXNMP_H = NEON3SameFP16Fixed | 0x20000000u, + NEON_FADDP_H = NEON3SameFP16Fixed | 0x20001000u, + NEON_FMUL_H = NEON3SameFP16Fixed | 0x20001800u, + NEON_FCMGE_H = NEON3SameFP16Fixed | 0x20002000u, + NEON_FACGE_H = NEON3SameFP16Fixed | 0x20002800u, + NEON_FMAXP_H = NEON3SameFP16Fixed | 0x20003000u, + NEON_FDIV_H = NEON3SameFP16Fixed | 0x20003800u, + NEON_FMINNMP_H = NEON3SameFP16Fixed | 0x20800000u, + NEON_FABD_H = NEON3SameFP16Fixed | 0x20801000u, + NEON_FCMGT_H = NEON3SameFP16Fixed | 0x20802000u, + NEON_FACGT_H = NEON3SameFP16Fixed | 0x20802800u, + NEON_FMINP_H = NEON3SameFP16Fixed | 0x20803000u +}; + + +// 'Extra' NEON instructions with three same-type operands. +enum NEON3SameExtraOp : uint32_t { + NEON3SameExtraFixed = 0x0E008400u, + NEON3SameExtraUBit = 0x20000000u, + NEON3SameExtraFMask = 0x9E208400u, + NEON3SameExtraMask = 0xBE20FC00u, + NEON_SQRDMLAH = NEON3SameExtraFixed | NEON3SameExtraUBit, + NEON_SQRDMLSH = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00000800u, + NEON_SDOT = NEON3SameExtraFixed | 0x00001000u, + NEON_UDOT = NEON3SameExtraFixed | NEON3SameExtraUBit | 0x00001000u, + + /* v8.3 Complex Numbers */ + NEON3SameExtraFCFixed = 0x2E00C400u, + NEON3SameExtraFCFMask = 0xBF20C400u, + // FCMLA fixes opcode<3:2>, and uses opcode<1:0> to encode . + NEON3SameExtraFCMLAMask = NEON3SameExtraFCFMask | 0x00006000u, + NEON_FCMLA = NEON3SameExtraFCFixed, + // FCADD fixes opcode<3:2, 0>, and uses opcode<1> to encode . + NEON3SameExtraFCADDMask = NEON3SameExtraFCFMask | 0x00006800u, + NEON_FCADD = NEON3SameExtraFCFixed | 0x00002000u + // Other encodings under NEON3SameExtraFCFMask are UNALLOCATED. +}; + +// NEON instructions with three different-type operands. +enum NEON3DifferentOp : uint32_t { + NEON3DifferentFixed = 0x0E200000u, + NEON3DifferentFMask = 0x9F200C00u, + NEON3DifferentMask = 0xFF20FC00u, + NEON_ADDHN = NEON3DifferentFixed | 0x00004000u, + NEON_ADDHN2 = NEON_ADDHN | NEON_Q, + NEON_PMULL = NEON3DifferentFixed | 0x0000E000u, + NEON_PMULL2 = NEON_PMULL | NEON_Q, + NEON_RADDHN = NEON3DifferentFixed | 0x20004000u, + NEON_RADDHN2 = NEON_RADDHN | NEON_Q, + NEON_RSUBHN = NEON3DifferentFixed | 0x20006000u, + NEON_RSUBHN2 = NEON_RSUBHN | NEON_Q, + NEON_SABAL = NEON3DifferentFixed | 0x00005000u, + NEON_SABAL2 = NEON_SABAL | NEON_Q, + NEON_SABDL = NEON3DifferentFixed | 0x00007000u, + NEON_SABDL2 = NEON_SABDL | NEON_Q, + NEON_SADDL = NEON3DifferentFixed | 0x00000000u, + NEON_SADDL2 = NEON_SADDL | NEON_Q, + NEON_SADDW = NEON3DifferentFixed | 0x00001000u, + NEON_SADDW2 = NEON_SADDW | NEON_Q, + NEON_SMLAL = NEON3DifferentFixed | 0x00008000u, + NEON_SMLAL2 = NEON_SMLAL | NEON_Q, + NEON_SMLSL = NEON3DifferentFixed | 0x0000A000u, + NEON_SMLSL2 = NEON_SMLSL | NEON_Q, + NEON_SMULL = NEON3DifferentFixed | 0x0000C000u, + NEON_SMULL2 = NEON_SMULL | NEON_Q, + NEON_SSUBL = NEON3DifferentFixed | 0x00002000u, + NEON_SSUBL2 = NEON_SSUBL | NEON_Q, + NEON_SSUBW = NEON3DifferentFixed | 0x00003000u, + NEON_SSUBW2 = NEON_SSUBW | NEON_Q, + NEON_SQDMLAL = NEON3DifferentFixed | 0x00009000u, + NEON_SQDMLAL2 = NEON_SQDMLAL | NEON_Q, + NEON_SQDMLSL = NEON3DifferentFixed | 0x0000B000u, + NEON_SQDMLSL2 = NEON_SQDMLSL | NEON_Q, + NEON_SQDMULL = NEON3DifferentFixed | 0x0000D000u, + NEON_SQDMULL2 = NEON_SQDMULL | NEON_Q, + NEON_SUBHN = NEON3DifferentFixed | 0x00006000u, + NEON_SUBHN2 = NEON_SUBHN | NEON_Q, + NEON_UABAL = NEON_SABAL | NEON3SameUBit, + NEON_UABAL2 = NEON_UABAL | NEON_Q, + NEON_UABDL = NEON_SABDL | NEON3SameUBit, + NEON_UABDL2 = NEON_UABDL | NEON_Q, + NEON_UADDL = NEON_SADDL | NEON3SameUBit, + NEON_UADDL2 = NEON_UADDL | NEON_Q, + NEON_UADDW = NEON_SADDW | NEON3SameUBit, + NEON_UADDW2 = NEON_UADDW | NEON_Q, + NEON_UMLAL = NEON_SMLAL | NEON3SameUBit, + NEON_UMLAL2 = NEON_UMLAL | NEON_Q, + NEON_UMLSL = NEON_SMLSL | NEON3SameUBit, + NEON_UMLSL2 = NEON_UMLSL | NEON_Q, + NEON_UMULL = NEON_SMULL | NEON3SameUBit, + NEON_UMULL2 = NEON_UMULL | NEON_Q, + NEON_USUBL = NEON_SSUBL | NEON3SameUBit, + NEON_USUBL2 = NEON_USUBL | NEON_Q, + NEON_USUBW = NEON_SSUBW | NEON3SameUBit, + NEON_USUBW2 = NEON_USUBW | NEON_Q +}; + +// NEON instructions operating across vectors. +enum NEONAcrossLanesOp : uint32_t { + NEONAcrossLanesFixed = 0x0E300800u, + NEONAcrossLanesFMask = 0x9F3E0C00u, + NEONAcrossLanesMask = 0xBF3FFC00u, + NEON_ADDV = NEONAcrossLanesFixed | 0x0001B000u, + NEON_SADDLV = NEONAcrossLanesFixed | 0x00003000u, + NEON_UADDLV = NEONAcrossLanesFixed | 0x20003000u, + NEON_SMAXV = NEONAcrossLanesFixed | 0x0000A000u, + NEON_SMINV = NEONAcrossLanesFixed | 0x0001A000u, + NEON_UMAXV = NEONAcrossLanesFixed | 0x2000A000u, + NEON_UMINV = NEONAcrossLanesFixed | 0x2001A000u, + + NEONAcrossLanesFP16Fixed = NEONAcrossLanesFixed | 0x0000C000u, + NEONAcrossLanesFP16FMask = NEONAcrossLanesFMask | 0x2000C000u, + NEONAcrossLanesFP16Mask = NEONAcrossLanesMask | 0x20800000u, + NEON_FMAXNMV_H = NEONAcrossLanesFP16Fixed | 0x00000000u, + NEON_FMAXV_H = NEONAcrossLanesFP16Fixed | 0x00003000u, + NEON_FMINNMV_H = NEONAcrossLanesFP16Fixed | 0x00800000u, + NEON_FMINV_H = NEONAcrossLanesFP16Fixed | 0x00803000u, + + // NEON floating point across instructions. + NEONAcrossLanesFPFixed = NEONAcrossLanesFixed | 0x2000C000u, + NEONAcrossLanesFPFMask = NEONAcrossLanesFMask | 0x2000C000u, + NEONAcrossLanesFPMask = NEONAcrossLanesMask | 0x20800000u, + + NEON_FMAXV = NEONAcrossLanesFPFixed | 0x2000F000u, + NEON_FMINV = NEONAcrossLanesFPFixed | 0x2080F000u, + NEON_FMAXNMV = NEONAcrossLanesFPFixed | 0x2000C000u, + NEON_FMINNMV = NEONAcrossLanesFPFixed | 0x2080C000u +}; + +// NEON instructions with indexed element operand. +enum NEONByIndexedElementOp : uint32_t { + NEONByIndexedElementFixed = 0x0F000000u, + NEONByIndexedElementFMask = 0x9F000400u, + NEONByIndexedElementMask = 0xBF00F400u, + NEON_MUL_byelement = NEONByIndexedElementFixed | 0x00008000u, + NEON_MLA_byelement = NEONByIndexedElementFixed | 0x20000000u, + NEON_MLS_byelement = NEONByIndexedElementFixed | 0x20004000u, + NEON_SMULL_byelement = NEONByIndexedElementFixed | 0x0000A000u, + NEON_SMLAL_byelement = NEONByIndexedElementFixed | 0x00002000u, + NEON_SMLSL_byelement = NEONByIndexedElementFixed | 0x00006000u, + NEON_UMULL_byelement = NEONByIndexedElementFixed | 0x2000A000u, + NEON_UMLAL_byelement = NEONByIndexedElementFixed | 0x20002000u, + NEON_UMLSL_byelement = NEONByIndexedElementFixed | 0x20006000u, + NEON_SQDMULL_byelement = NEONByIndexedElementFixed | 0x0000B000u, + NEON_SQDMLAL_byelement = NEONByIndexedElementFixed | 0x00003000u, + NEON_SQDMLSL_byelement = NEONByIndexedElementFixed | 0x00007000u, + NEON_SQDMULH_byelement = NEONByIndexedElementFixed | 0x0000C000u, + NEON_SQRDMULH_byelement = NEONByIndexedElementFixed | 0x0000D000u, + NEON_SDOT_byelement = NEONByIndexedElementFixed | 0x0000E000u, + NEON_SQRDMLAH_byelement = NEONByIndexedElementFixed | 0x2000D000u, + NEON_UDOT_byelement = NEONByIndexedElementFixed | 0x2000E000u, + NEON_SQRDMLSH_byelement = NEONByIndexedElementFixed | 0x2000F000u, + + NEON_FMLA_H_byelement = NEONByIndexedElementFixed | 0x00001000u, + NEON_FMLS_H_byelement = NEONByIndexedElementFixed | 0x00005000u, + NEON_FMUL_H_byelement = NEONByIndexedElementFixed | 0x00009000u, + NEON_FMULX_H_byelement = NEONByIndexedElementFixed | 0x20009000u, + + // Floating point instructions. + NEONByIndexedElementFPFixed = NEONByIndexedElementFixed | 0x00800000u, + NEONByIndexedElementFPMask = NEONByIndexedElementMask | 0x00800000u, + NEON_FMLA_byelement = NEONByIndexedElementFPFixed | 0x00001000u, + NEON_FMLS_byelement = NEONByIndexedElementFPFixed | 0x00005000u, + NEON_FMUL_byelement = NEONByIndexedElementFPFixed | 0x00009000u, + NEON_FMULX_byelement = NEONByIndexedElementFPFixed | 0x20009000u, + + // FMLAL-like instructions. + // For all cases: U = x, size = 10, opcode = xx00 + NEONByIndexedElementFPLongFixed = NEONByIndexedElementFixed | 0x00800000u, + NEONByIndexedElementFPLongFMask = NEONByIndexedElementFMask | 0x00C03000u, + NEONByIndexedElementFPLongMask = 0xBFC0F400u, + NEON_FMLAL_H_byelement = NEONByIndexedElementFixed | 0x00800000u, + NEON_FMLAL2_H_byelement = NEONByIndexedElementFixed | 0x20808000u, + NEON_FMLSL_H_byelement = NEONByIndexedElementFixed | 0x00804000u, + NEON_FMLSL2_H_byelement = NEONByIndexedElementFixed | 0x2080C000u, + + // Complex instruction(s). + // This is necessary because the 'rot' encoding moves into the + // NEONByIndex..Mask space. + NEONByIndexedElementFPComplexMask = 0xBF009400u, + NEON_FCMLA_byelement = NEONByIndexedElementFixed | 0x20001000u +}; + +// NEON register copy. +enum NEONCopyOp : uint32_t { + NEONCopyFixed = 0x0E000400u, + NEONCopyFMask = 0x9FE08400u, + NEONCopyMask = 0x3FE08400u, + NEONCopyInsElementMask = NEONCopyMask | 0x40000000u, + NEONCopyInsGeneralMask = NEONCopyMask | 0x40007800u, + NEONCopyDupElementMask = NEONCopyMask | 0x20007800u, + NEONCopyDupGeneralMask = NEONCopyDupElementMask, + NEONCopyUmovMask = NEONCopyMask | 0x20007800u, + NEONCopySmovMask = NEONCopyMask | 0x20007800u, + NEON_INS_ELEMENT = NEONCopyFixed | 0x60000000u, + NEON_INS_GENERAL = NEONCopyFixed | 0x40001800u, + NEON_DUP_ELEMENT = NEONCopyFixed | 0x00000000u, + NEON_DUP_GENERAL = NEONCopyFixed | 0x00000800u, + NEON_SMOV = NEONCopyFixed | 0x00002800u, + NEON_UMOV = NEONCopyFixed | 0x00003800u +}; + +// NEON extract. +enum NEONExtractOp : uint32_t { + NEONExtractFixed = 0x2E000000u, + NEONExtractFMask = 0xBF208400u, + NEONExtractMask = 0xBFE08400u, + NEON_EXT = NEONExtractFixed | 0x00000000u +}; + +enum NEONLoadStoreMultiOp : uint32_t { + NEONLoadStoreMultiL = 0x00400000u, + NEONLoadStoreMulti1_1v = 0x00007000u, + NEONLoadStoreMulti1_2v = 0x0000A000u, + NEONLoadStoreMulti1_3v = 0x00006000u, + NEONLoadStoreMulti1_4v = 0x00002000u, + NEONLoadStoreMulti2 = 0x00008000u, + NEONLoadStoreMulti3 = 0x00004000u, + NEONLoadStoreMulti4 = 0x00000000u +}; + +// NEON load/store multiple structures. +enum NEONLoadStoreMultiStructOp : uint32_t { + NEONLoadStoreMultiStructFixed = 0x0C000000u, + NEONLoadStoreMultiStructFMask = 0xBFBF0000u, + NEONLoadStoreMultiStructMask = 0xBFFFF000u, + NEONLoadStoreMultiStructStore = NEONLoadStoreMultiStructFixed, + NEONLoadStoreMultiStructLoad = NEONLoadStoreMultiStructFixed | + NEONLoadStoreMultiL, + NEON_LD1_1v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_1v, + NEON_LD1_2v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_2v, + NEON_LD1_3v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_3v, + NEON_LD1_4v = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti1_4v, + NEON_LD2 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti2, + NEON_LD3 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti3, + NEON_LD4 = NEONLoadStoreMultiStructLoad | NEONLoadStoreMulti4, + NEON_ST1_1v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_1v, + NEON_ST1_2v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_2v, + NEON_ST1_3v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_3v, + NEON_ST1_4v = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti1_4v, + NEON_ST2 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti2, + NEON_ST3 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti3, + NEON_ST4 = NEONLoadStoreMultiStructStore | NEONLoadStoreMulti4 +}; + +// NEON load/store multiple structures with post-index addressing. +enum NEONLoadStoreMultiStructPostIndexOp : uint32_t { + NEONLoadStoreMultiStructPostIndexFixed = 0x0C800000u, + NEONLoadStoreMultiStructPostIndexFMask = 0xBFA00000u, + NEONLoadStoreMultiStructPostIndexMask = 0xBFE0F000u, + NEONLoadStoreMultiStructPostIndex = 0x00800000u, + NEON_LD1_1v_post = NEON_LD1_1v | NEONLoadStoreMultiStructPostIndex, + NEON_LD1_2v_post = NEON_LD1_2v | NEONLoadStoreMultiStructPostIndex, + NEON_LD1_3v_post = NEON_LD1_3v | NEONLoadStoreMultiStructPostIndex, + NEON_LD1_4v_post = NEON_LD1_4v | NEONLoadStoreMultiStructPostIndex, + NEON_LD2_post = NEON_LD2 | NEONLoadStoreMultiStructPostIndex, + NEON_LD3_post = NEON_LD3 | NEONLoadStoreMultiStructPostIndex, + NEON_LD4_post = NEON_LD4 | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_1v_post = NEON_ST1_1v | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_2v_post = NEON_ST1_2v | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_3v_post = NEON_ST1_3v | NEONLoadStoreMultiStructPostIndex, + NEON_ST1_4v_post = NEON_ST1_4v | NEONLoadStoreMultiStructPostIndex, + NEON_ST2_post = NEON_ST2 | NEONLoadStoreMultiStructPostIndex, + NEON_ST3_post = NEON_ST3 | NEONLoadStoreMultiStructPostIndex, + NEON_ST4_post = NEON_ST4 | NEONLoadStoreMultiStructPostIndex +}; + +enum NEONLoadStoreSingleOp : uint32_t { + NEONLoadStoreSingle1 = 0x00000000u, + NEONLoadStoreSingle2 = 0x00200000u, + NEONLoadStoreSingle3 = 0x00002000u, + NEONLoadStoreSingle4 = 0x00202000u, + NEONLoadStoreSingleL = 0x00400000u, + NEONLoadStoreSingle_b = 0x00000000u, + NEONLoadStoreSingle_h = 0x00004000u, + NEONLoadStoreSingle_s = 0x00008000u, + NEONLoadStoreSingle_d = 0x00008400u, + NEONLoadStoreSingleAllLanes = 0x0000C000u, + NEONLoadStoreSingleLenMask = 0x00202000u +}; + +// NEON load/store single structure. +enum NEONLoadStoreSingleStructOp : uint32_t { + NEONLoadStoreSingleStructFixed = 0x0D000000u, + NEONLoadStoreSingleStructFMask = 0xBF9F0000u, + NEONLoadStoreSingleStructMask = 0xBFFFE000u, + NEONLoadStoreSingleStructStore = NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructLoad = NEONLoadStoreSingleStructFixed | + NEONLoadStoreSingleL, + NEONLoadStoreSingleStructLoad1 = NEONLoadStoreSingle1 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructLoad2 = NEONLoadStoreSingle2 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructLoad3 = NEONLoadStoreSingle3 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructLoad4 = NEONLoadStoreSingle4 | + NEONLoadStoreSingleStructLoad, + NEONLoadStoreSingleStructStore1 = NEONLoadStoreSingle1 | + NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructStore2 = NEONLoadStoreSingle2 | + NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructStore3 = NEONLoadStoreSingle3 | + NEONLoadStoreSingleStructFixed, + NEONLoadStoreSingleStructStore4 = NEONLoadStoreSingle4 | + NEONLoadStoreSingleStructFixed, + NEON_LD1_b = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_b, + NEON_LD1_h = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_h, + NEON_LD1_s = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_s, + NEON_LD1_d = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingle_d, + NEON_LD1R = NEONLoadStoreSingleStructLoad1 | NEONLoadStoreSingleAllLanes, + NEON_ST1_b = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_b, + NEON_ST1_h = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_h, + NEON_ST1_s = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_s, + NEON_ST1_d = NEONLoadStoreSingleStructStore1 | NEONLoadStoreSingle_d, + + NEON_LD2_b = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_b, + NEON_LD2_h = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_h, + NEON_LD2_s = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_s, + NEON_LD2_d = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingle_d, + NEON_LD2R = NEONLoadStoreSingleStructLoad2 | NEONLoadStoreSingleAllLanes, + NEON_ST2_b = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_b, + NEON_ST2_h = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_h, + NEON_ST2_s = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_s, + NEON_ST2_d = NEONLoadStoreSingleStructStore2 | NEONLoadStoreSingle_d, + + NEON_LD3_b = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_b, + NEON_LD3_h = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_h, + NEON_LD3_s = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_s, + NEON_LD3_d = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingle_d, + NEON_LD3R = NEONLoadStoreSingleStructLoad3 | NEONLoadStoreSingleAllLanes, + NEON_ST3_b = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_b, + NEON_ST3_h = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_h, + NEON_ST3_s = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_s, + NEON_ST3_d = NEONLoadStoreSingleStructStore3 | NEONLoadStoreSingle_d, + + NEON_LD4_b = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_b, + NEON_LD4_h = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_h, + NEON_LD4_s = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_s, + NEON_LD4_d = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingle_d, + NEON_LD4R = NEONLoadStoreSingleStructLoad4 | NEONLoadStoreSingleAllLanes, + NEON_ST4_b = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_b, + NEON_ST4_h = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_h, + NEON_ST4_s = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_s, + NEON_ST4_d = NEONLoadStoreSingleStructStore4 | NEONLoadStoreSingle_d +}; + +// NEON load/store single structure with post-index addressing. +enum NEONLoadStoreSingleStructPostIndexOp : uint32_t { + NEONLoadStoreSingleStructPostIndexFixed = 0x0D800000u, + NEONLoadStoreSingleStructPostIndexFMask = 0xBF800000u, + NEONLoadStoreSingleStructPostIndexMask = 0xBFE0E000u, + NEONLoadStoreSingleStructPostIndex = 0x00800000u, + NEON_LD1_b_post = NEON_LD1_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD1_h_post = NEON_LD1_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD1_s_post = NEON_LD1_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD1_d_post = NEON_LD1_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD1R_post = NEON_LD1R | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_b_post = NEON_ST1_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_h_post = NEON_ST1_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_s_post = NEON_ST1_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST1_d_post = NEON_ST1_d | NEONLoadStoreSingleStructPostIndex, + + NEON_LD2_b_post = NEON_LD2_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD2_h_post = NEON_LD2_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD2_s_post = NEON_LD2_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD2_d_post = NEON_LD2_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD2R_post = NEON_LD2R | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_b_post = NEON_ST2_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_h_post = NEON_ST2_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_s_post = NEON_ST2_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST2_d_post = NEON_ST2_d | NEONLoadStoreSingleStructPostIndex, + + NEON_LD3_b_post = NEON_LD3_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD3_h_post = NEON_LD3_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD3_s_post = NEON_LD3_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD3_d_post = NEON_LD3_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD3R_post = NEON_LD3R | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_b_post = NEON_ST3_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_h_post = NEON_ST3_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_s_post = NEON_ST3_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST3_d_post = NEON_ST3_d | NEONLoadStoreSingleStructPostIndex, + + NEON_LD4_b_post = NEON_LD4_b | NEONLoadStoreSingleStructPostIndex, + NEON_LD4_h_post = NEON_LD4_h | NEONLoadStoreSingleStructPostIndex, + NEON_LD4_s_post = NEON_LD4_s | NEONLoadStoreSingleStructPostIndex, + NEON_LD4_d_post = NEON_LD4_d | NEONLoadStoreSingleStructPostIndex, + NEON_LD4R_post = NEON_LD4R | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_b_post = NEON_ST4_b | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_h_post = NEON_ST4_h | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_s_post = NEON_ST4_s | NEONLoadStoreSingleStructPostIndex, + NEON_ST4_d_post = NEON_ST4_d | NEONLoadStoreSingleStructPostIndex +}; + +// NEON modified immediate. +enum NEONModifiedImmediateOp : uint32_t { + NEONModifiedImmediateFixed = 0x0F000400u, + NEONModifiedImmediateFMask = 0x9FF80400u, + NEONModifiedImmediateOpBit = 0x20000000u, + NEONModifiedImmediate_FMOV = NEONModifiedImmediateFixed | 0x00000800u, + NEONModifiedImmediate_MOVI = NEONModifiedImmediateFixed | 0x00000000u, + NEONModifiedImmediate_MVNI = NEONModifiedImmediateFixed | 0x20000000u, + NEONModifiedImmediate_ORR = NEONModifiedImmediateFixed | 0x00001000u, + NEONModifiedImmediate_BIC = NEONModifiedImmediateFixed | 0x20001000u +}; + +// NEON shift immediate. +enum NEONShiftImmediateOp : uint32_t { + NEONShiftImmediateFixed = 0x0F000400u, + NEONShiftImmediateFMask = 0x9F800400u, + NEONShiftImmediateMask = 0xBF80FC00u, + NEONShiftImmediateUBit = 0x20000000u, + NEON_SHL = NEONShiftImmediateFixed | 0x00005000u, + NEON_SSHLL = NEONShiftImmediateFixed | 0x0000A000u, + NEON_USHLL = NEONShiftImmediateFixed | 0x2000A000u, + NEON_SLI = NEONShiftImmediateFixed | 0x20005000u, + NEON_SRI = NEONShiftImmediateFixed | 0x20004000u, + NEON_SHRN = NEONShiftImmediateFixed | 0x00008000u, + NEON_RSHRN = NEONShiftImmediateFixed | 0x00008800u, + NEON_UQSHRN = NEONShiftImmediateFixed | 0x20009000u, + NEON_UQRSHRN = NEONShiftImmediateFixed | 0x20009800u, + NEON_SQSHRN = NEONShiftImmediateFixed | 0x00009000u, + NEON_SQRSHRN = NEONShiftImmediateFixed | 0x00009800u, + NEON_SQSHRUN = NEONShiftImmediateFixed | 0x20008000u, + NEON_SQRSHRUN = NEONShiftImmediateFixed | 0x20008800u, + NEON_SSHR = NEONShiftImmediateFixed | 0x00000000u, + NEON_SRSHR = NEONShiftImmediateFixed | 0x00002000u, + NEON_USHR = NEONShiftImmediateFixed | 0x20000000u, + NEON_URSHR = NEONShiftImmediateFixed | 0x20002000u, + NEON_SSRA = NEONShiftImmediateFixed | 0x00001000u, + NEON_SRSRA = NEONShiftImmediateFixed | 0x00003000u, + NEON_USRA = NEONShiftImmediateFixed | 0x20001000u, + NEON_URSRA = NEONShiftImmediateFixed | 0x20003000u, + NEON_SQSHLU = NEONShiftImmediateFixed | 0x20006000u, + NEON_SCVTF_imm = NEONShiftImmediateFixed | 0x0000E000u, + NEON_UCVTF_imm = NEONShiftImmediateFixed | 0x2000E000u, + NEON_FCVTZS_imm = NEONShiftImmediateFixed | 0x0000F800u, + NEON_FCVTZU_imm = NEONShiftImmediateFixed | 0x2000F800u, + NEON_SQSHL_imm = NEONShiftImmediateFixed | 0x00007000u, + NEON_UQSHL_imm = NEONShiftImmediateFixed | 0x20007000u +}; + +// NEON table. +enum NEONTableOp : uint32_t { + NEONTableFixed = 0x0E000000u, + NEONTableFMask = 0xBF208C00u, + NEONTableExt = 0x00001000u, + NEONTableMask = 0xBF20FC00u, + NEON_TBL_1v = NEONTableFixed | 0x00000000u, + NEON_TBL_2v = NEONTableFixed | 0x00002000u, + NEON_TBL_3v = NEONTableFixed | 0x00004000u, + NEON_TBL_4v = NEONTableFixed | 0x00006000u, + NEON_TBX_1v = NEON_TBL_1v | NEONTableExt, + NEON_TBX_2v = NEON_TBL_2v | NEONTableExt, + NEON_TBX_3v = NEON_TBL_3v | NEONTableExt, + NEON_TBX_4v = NEON_TBL_4v | NEONTableExt +}; + +// NEON perm. +enum NEONPermOp : uint32_t { + NEONPermFixed = 0x0E000800u, + NEONPermFMask = 0xBF208C00u, + NEONPermMask = 0x3F20FC00u, + NEON_UZP1 = NEONPermFixed | 0x00001000u, + NEON_TRN1 = NEONPermFixed | 0x00002000u, + NEON_ZIP1 = NEONPermFixed | 0x00003000u, + NEON_UZP2 = NEONPermFixed | 0x00005000u, + NEON_TRN2 = NEONPermFixed | 0x00006000u, + NEON_ZIP2 = NEONPermFixed | 0x00007000u +}; + +// NEON scalar instructions with two register operands. +enum NEONScalar2RegMiscOp : uint32_t { + NEONScalar2RegMiscFixed = 0x5E200800u, + NEONScalar2RegMiscFMask = 0xDF3E0C00u, + NEONScalar2RegMiscMask = NEON_Q | NEONScalar | NEON2RegMiscMask, + NEON_CMGT_zero_scalar = NEON_Q | NEONScalar | NEON_CMGT_zero, + NEON_CMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_CMEQ_zero, + NEON_CMLT_zero_scalar = NEON_Q | NEONScalar | NEON_CMLT_zero, + NEON_CMGE_zero_scalar = NEON_Q | NEONScalar | NEON_CMGE_zero, + NEON_CMLE_zero_scalar = NEON_Q | NEONScalar | NEON_CMLE_zero, + NEON_ABS_scalar = NEON_Q | NEONScalar | NEON_ABS, + NEON_SQABS_scalar = NEON_Q | NEONScalar | NEON_SQABS, + NEON_NEG_scalar = NEON_Q | NEONScalar | NEON_NEG, + NEON_SQNEG_scalar = NEON_Q | NEONScalar | NEON_SQNEG, + NEON_SQXTN_scalar = NEON_Q | NEONScalar | NEON_SQXTN, + NEON_UQXTN_scalar = NEON_Q | NEONScalar | NEON_UQXTN, + NEON_SQXTUN_scalar = NEON_Q | NEONScalar | NEON_SQXTUN, + NEON_SUQADD_scalar = NEON_Q | NEONScalar | NEON_SUQADD, + NEON_USQADD_scalar = NEON_Q | NEONScalar | NEON_USQADD, + + NEONScalar2RegMiscOpcode = NEON2RegMiscOpcode, + NEON_NEG_scalar_opcode = NEON_NEG_scalar & NEONScalar2RegMiscOpcode, + + NEONScalar2RegMiscFPMask = NEONScalar2RegMiscMask | 0x00800000u, + NEON_FRSQRTE_scalar = NEON_Q | NEONScalar | NEON_FRSQRTE, + NEON_FRECPE_scalar = NEON_Q | NEONScalar | NEON_FRECPE, + NEON_SCVTF_scalar = NEON_Q | NEONScalar | NEON_SCVTF, + NEON_UCVTF_scalar = NEON_Q | NEONScalar | NEON_UCVTF, + NEON_FCMGT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_zero, + NEON_FCMEQ_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_zero, + NEON_FCMLT_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_zero, + NEON_FCMGE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_zero, + NEON_FCMLE_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_zero, + NEON_FRECPX_scalar = NEONScalar2RegMiscFixed | 0x0081F000u, + NEON_FCVTNS_scalar = NEON_Q | NEONScalar | NEON_FCVTNS, + NEON_FCVTNU_scalar = NEON_Q | NEONScalar | NEON_FCVTNU, + NEON_FCVTPS_scalar = NEON_Q | NEONScalar | NEON_FCVTPS, + NEON_FCVTPU_scalar = NEON_Q | NEONScalar | NEON_FCVTPU, + NEON_FCVTMS_scalar = NEON_Q | NEONScalar | NEON_FCVTMS, + NEON_FCVTMU_scalar = NEON_Q | NEONScalar | NEON_FCVTMU, + NEON_FCVTZS_scalar = NEON_Q | NEONScalar | NEON_FCVTZS, + NEON_FCVTZU_scalar = NEON_Q | NEONScalar | NEON_FCVTZU, + NEON_FCVTAS_scalar = NEON_Q | NEONScalar | NEON_FCVTAS, + NEON_FCVTAU_scalar = NEON_Q | NEONScalar | NEON_FCVTAU, + NEON_FCVTXN_scalar = NEON_Q | NEONScalar | NEON_FCVTXN +}; + +// NEON instructions with two register operands (FP16). +enum NEONScalar2RegMiscFP16Op : uint32_t { + NEONScalar2RegMiscFP16Fixed = 0x5E780800u, + NEONScalar2RegMiscFP16FMask = 0xDF7E0C00u, + NEONScalar2RegMiscFP16Mask = 0xFFFFFC00u, + NEON_FCVTNS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNS_H, + NEON_FCVTMS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMS_H, + NEON_FCVTAS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAS_H, + NEON_SCVTF_H_scalar = NEON_Q | NEONScalar | NEON_SCVTF_H, + NEON_FCMGT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGT_H_zero, + NEON_FCMEQ_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_H_zero, + NEON_FCMLT_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLT_H_zero, + NEON_FCVTPS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPS_H, + NEON_FCVTZS_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_H, + NEON_FRECPE_H_scalar = NEON_Q | NEONScalar | NEON_FRECPE_H, + NEON_FRECPX_H_scalar = NEONScalar2RegMiscFP16Fixed | 0x0081F000u, + NEON_FCVTNU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTNU_H, + NEON_FCVTMU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTMU_H, + NEON_FCVTAU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTAU_H, + NEON_UCVTF_H_scalar = NEON_Q | NEONScalar | NEON_UCVTF_H, + NEON_FCMGE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMGE_H_zero, + NEON_FCMLE_H_zero_scalar = NEON_Q | NEONScalar | NEON_FCMLE_H_zero, + NEON_FCVTPU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTPU_H, + NEON_FCVTZU_H_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_H, + NEON_FRSQRTE_H_scalar = NEON_Q | NEONScalar | NEON_FRSQRTE_H +}; + +// NEON scalar instructions with three same-type operands. +enum NEONScalar3SameOp : uint32_t { + NEONScalar3SameFixed = 0x5E200400u, + NEONScalar3SameFMask = 0xDF200400u, + NEONScalar3SameMask = 0xFF20FC00u, + NEON_ADD_scalar = NEON_Q | NEONScalar | NEON_ADD, + NEON_CMEQ_scalar = NEON_Q | NEONScalar | NEON_CMEQ, + NEON_CMGE_scalar = NEON_Q | NEONScalar | NEON_CMGE, + NEON_CMGT_scalar = NEON_Q | NEONScalar | NEON_CMGT, + NEON_CMHI_scalar = NEON_Q | NEONScalar | NEON_CMHI, + NEON_CMHS_scalar = NEON_Q | NEONScalar | NEON_CMHS, + NEON_CMTST_scalar = NEON_Q | NEONScalar | NEON_CMTST, + NEON_SUB_scalar = NEON_Q | NEONScalar | NEON_SUB, + NEON_UQADD_scalar = NEON_Q | NEONScalar | NEON_UQADD, + NEON_SQADD_scalar = NEON_Q | NEONScalar | NEON_SQADD, + NEON_UQSUB_scalar = NEON_Q | NEONScalar | NEON_UQSUB, + NEON_SQSUB_scalar = NEON_Q | NEONScalar | NEON_SQSUB, + NEON_USHL_scalar = NEON_Q | NEONScalar | NEON_USHL, + NEON_SSHL_scalar = NEON_Q | NEONScalar | NEON_SSHL, + NEON_UQSHL_scalar = NEON_Q | NEONScalar | NEON_UQSHL, + NEON_SQSHL_scalar = NEON_Q | NEONScalar | NEON_SQSHL, + NEON_URSHL_scalar = NEON_Q | NEONScalar | NEON_URSHL, + NEON_SRSHL_scalar = NEON_Q | NEONScalar | NEON_SRSHL, + NEON_UQRSHL_scalar = NEON_Q | NEONScalar | NEON_UQRSHL, + NEON_SQRSHL_scalar = NEON_Q | NEONScalar | NEON_SQRSHL, + NEON_SQDMULH_scalar = NEON_Q | NEONScalar | NEON_SQDMULH, + NEON_SQRDMULH_scalar = NEON_Q | NEONScalar | NEON_SQRDMULH, + + // NEON floating point scalar instructions with three same-type operands. + NEONScalar3SameFPFixed = NEONScalar3SameFixed | 0x0000C000u, + NEONScalar3SameFPFMask = NEONScalar3SameFMask | 0x0000C000u, + NEONScalar3SameFPMask = NEONScalar3SameMask | 0x00800000u, + NEON_FACGE_scalar = NEON_Q | NEONScalar | NEON_FACGE, + NEON_FACGT_scalar = NEON_Q | NEONScalar | NEON_FACGT, + NEON_FCMEQ_scalar = NEON_Q | NEONScalar | NEON_FCMEQ, + NEON_FCMGE_scalar = NEON_Q | NEONScalar | NEON_FCMGE, + NEON_FCMGT_scalar = NEON_Q | NEONScalar | NEON_FCMGT, + NEON_FMULX_scalar = NEON_Q | NEONScalar | NEON_FMULX, + NEON_FRECPS_scalar = NEON_Q | NEONScalar | NEON_FRECPS, + NEON_FRSQRTS_scalar = NEON_Q | NEONScalar | NEON_FRSQRTS, + NEON_FABD_scalar = NEON_Q | NEONScalar | NEON_FABD +}; + +// NEON scalar FP16 instructions with three same-type operands. +enum NEONScalar3SameFP16Op : uint32_t { + NEONScalar3SameFP16Fixed = 0x5E400400u, + NEONScalar3SameFP16FMask = 0xDF60C400u, + NEONScalar3SameFP16Mask = 0xFFE0FC00u, + NEON_FABD_H_scalar = NEON_Q | NEONScalar | NEON_FABD_H, + NEON_FMULX_H_scalar = NEON_Q | NEONScalar | NEON_FMULX_H, + NEON_FCMEQ_H_scalar = NEON_Q | NEONScalar | NEON_FCMEQ_H, + NEON_FCMGE_H_scalar = NEON_Q | NEONScalar | NEON_FCMGE_H, + NEON_FCMGT_H_scalar = NEON_Q | NEONScalar | NEON_FCMGT_H, + NEON_FACGE_H_scalar = NEON_Q | NEONScalar | NEON_FACGE_H, + NEON_FACGT_H_scalar = NEON_Q | NEONScalar | NEON_FACGT_H, + NEON_FRECPS_H_scalar = NEON_Q | NEONScalar | NEON_FRECPS_H, + NEON_FRSQRTS_H_scalar = NEON_Q | NEONScalar | NEON_FRSQRTS_H +}; + +// 'Extra' NEON scalar instructions with three same-type operands. +enum NEONScalar3SameExtraOp : uint32_t { + NEONScalar3SameExtraFixed = 0x5E008400u, + NEONScalar3SameExtraFMask = 0xDF208400u, + NEONScalar3SameExtraMask = 0xFF20FC00u, + NEON_SQRDMLAH_scalar = NEON_Q | NEONScalar | NEON_SQRDMLAH, + NEON_SQRDMLSH_scalar = NEON_Q | NEONScalar | NEON_SQRDMLSH +}; + +// NEON scalar instructions with three different-type operands. +enum NEONScalar3DiffOp : uint32_t { + NEONScalar3DiffFixed = 0x5E200000u, + NEONScalar3DiffFMask = 0xDF200C00u, + NEONScalar3DiffMask = NEON_Q | NEONScalar | NEON3DifferentMask, + NEON_SQDMLAL_scalar = NEON_Q | NEONScalar | NEON_SQDMLAL, + NEON_SQDMLSL_scalar = NEON_Q | NEONScalar | NEON_SQDMLSL, + NEON_SQDMULL_scalar = NEON_Q | NEONScalar | NEON_SQDMULL +}; + +// NEON scalar instructions with indexed element operand. +enum NEONScalarByIndexedElementOp : uint32_t { + NEONScalarByIndexedElementFixed = 0x5F000000u, + NEONScalarByIndexedElementFMask = 0xDF000400u, + NEONScalarByIndexedElementMask = 0xFF00F400u, + NEON_SQDMLAL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMLAL_byelement, + NEON_SQDMLSL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMLSL_byelement, + NEON_SQDMULL_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMULL_byelement, + NEON_SQDMULH_byelement_scalar = NEON_Q | NEONScalar | NEON_SQDMULH_byelement, + NEON_SQRDMULH_byelement_scalar + = NEON_Q | NEONScalar | NEON_SQRDMULH_byelement, + NEON_SQRDMLAH_byelement_scalar + = NEON_Q | NEONScalar | NEON_SQRDMLAH_byelement, + NEON_SQRDMLSH_byelement_scalar + = NEON_Q | NEONScalar | NEON_SQRDMLSH_byelement, + NEON_FMLA_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLA_H_byelement, + NEON_FMLS_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLS_H_byelement, + NEON_FMUL_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMUL_H_byelement, + NEON_FMULX_H_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_H_byelement, + + // Floating point instructions. + NEONScalarByIndexedElementFPFixed + = NEONScalarByIndexedElementFixed | 0x00800000u, + NEONScalarByIndexedElementFPMask + = NEONScalarByIndexedElementMask | 0x00800000u, + NEON_FMLA_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLA_byelement, + NEON_FMLS_byelement_scalar = NEON_Q | NEONScalar | NEON_FMLS_byelement, + NEON_FMUL_byelement_scalar = NEON_Q | NEONScalar | NEON_FMUL_byelement, + NEON_FMULX_byelement_scalar = NEON_Q | NEONScalar | NEON_FMULX_byelement +}; + +// NEON scalar register copy. +enum NEONScalarCopyOp : uint32_t { + NEONScalarCopyFixed = 0x5E000400u, + NEONScalarCopyFMask = 0xDFE08400u, + NEONScalarCopyMask = 0xFFE0FC00u, + NEON_DUP_ELEMENT_scalar = NEON_Q | NEONScalar | NEON_DUP_ELEMENT +}; + +// NEON scalar pairwise instructions. +enum NEONScalarPairwiseOp : uint32_t { + NEONScalarPairwiseFixed = 0x5E300800u, + NEONScalarPairwiseFMask = 0xDF3E0C00u, + NEONScalarPairwiseMask = 0xFFB1F800u, + NEON_ADDP_scalar = NEONScalarPairwiseFixed | 0x0081B000u, + NEON_FMAXNMP_h_scalar = NEONScalarPairwiseFixed | 0x0000C000u, + NEON_FADDP_h_scalar = NEONScalarPairwiseFixed | 0x0000D000u, + NEON_FMAXP_h_scalar = NEONScalarPairwiseFixed | 0x0000F000u, + NEON_FMINNMP_h_scalar = NEONScalarPairwiseFixed | 0x0080C000u, + NEON_FMINP_h_scalar = NEONScalarPairwiseFixed | 0x0080F000u, + NEON_FMAXNMP_scalar = NEONScalarPairwiseFixed | 0x2000C000u, + NEON_FMINNMP_scalar = NEONScalarPairwiseFixed | 0x2080C000u, + NEON_FADDP_scalar = NEONScalarPairwiseFixed | 0x2000D000u, + NEON_FMAXP_scalar = NEONScalarPairwiseFixed | 0x2000F000u, + NEON_FMINP_scalar = NEONScalarPairwiseFixed | 0x2080F000u +}; + +// NEON scalar shift immediate. +enum NEONScalarShiftImmediateOp : uint32_t { + NEONScalarShiftImmediateFixed = 0x5F000400u, + NEONScalarShiftImmediateFMask = 0xDF800400u, + NEONScalarShiftImmediateMask = 0xFF80FC00u, + NEON_SHL_scalar = NEON_Q | NEONScalar | NEON_SHL, + NEON_SLI_scalar = NEON_Q | NEONScalar | NEON_SLI, + NEON_SRI_scalar = NEON_Q | NEONScalar | NEON_SRI, + NEON_SSHR_scalar = NEON_Q | NEONScalar | NEON_SSHR, + NEON_USHR_scalar = NEON_Q | NEONScalar | NEON_USHR, + NEON_SRSHR_scalar = NEON_Q | NEONScalar | NEON_SRSHR, + NEON_URSHR_scalar = NEON_Q | NEONScalar | NEON_URSHR, + NEON_SSRA_scalar = NEON_Q | NEONScalar | NEON_SSRA, + NEON_USRA_scalar = NEON_Q | NEONScalar | NEON_USRA, + NEON_SRSRA_scalar = NEON_Q | NEONScalar | NEON_SRSRA, + NEON_URSRA_scalar = NEON_Q | NEONScalar | NEON_URSRA, + NEON_UQSHRN_scalar = NEON_Q | NEONScalar | NEON_UQSHRN, + NEON_UQRSHRN_scalar = NEON_Q | NEONScalar | NEON_UQRSHRN, + NEON_SQSHRN_scalar = NEON_Q | NEONScalar | NEON_SQSHRN, + NEON_SQRSHRN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRN, + NEON_SQSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQSHRUN, + NEON_SQRSHRUN_scalar = NEON_Q | NEONScalar | NEON_SQRSHRUN, + NEON_SQSHLU_scalar = NEON_Q | NEONScalar | NEON_SQSHLU, + NEON_SQSHL_imm_scalar = NEON_Q | NEONScalar | NEON_SQSHL_imm, + NEON_UQSHL_imm_scalar = NEON_Q | NEONScalar | NEON_UQSHL_imm, + NEON_SCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_SCVTF_imm, + NEON_UCVTF_imm_scalar = NEON_Q | NEONScalar | NEON_UCVTF_imm, + NEON_FCVTZS_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZS_imm, + NEON_FCVTZU_imm_scalar = NEON_Q | NEONScalar | NEON_FCVTZU_imm +}; + +enum SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsOp : uint32_t { + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed = 0x84A00000u, + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000u, + SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000u, + LD1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed, + LDFF1SH_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00002000u, + LD1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000u, + LDFF1H_z_p_bz_s_x32_scaled = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000u +}; + +enum SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsOp : uint32_t { + SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed = 0x85200000u, + SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08000u, + SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E000u, + LD1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00004000u, + LDFF1W_z_p_bz_s_x32_scaled = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed | 0x00006000u +}; + +enum SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsOp : uint32_t { + SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed = 0x84000000u, + SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE208000u, + SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsMask = 0xFFA0E000u, + LD1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed, + LDFF1SB_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00002000u, + LD1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00004000u, + LDFF1B_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00006000u, + LD1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000u, + LDFF1SH_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00802000u, + LD1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00804000u, + LDFF1H_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x00806000u, + LD1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01004000u, + LDFF1W_z_p_bz_s_x32_unscaled = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed | 0x01006000u +}; + +enum SVE32BitGatherLoad_VectorPlusImmOp : uint32_t { + SVE32BitGatherLoad_VectorPlusImmFixed = 0x84208000u, + SVE32BitGatherLoad_VectorPlusImmFMask = 0xFE608000u, + SVE32BitGatherLoad_VectorPlusImmMask = 0xFFE0E000u, + LD1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed, + LDFF1SB_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00002000u, + LD1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00004000u, + LDFF1B_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00006000u, + LD1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00800000u, + LDFF1SH_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00802000u, + LD1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00804000u, + LDFF1H_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x00806000u, + LD1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01004000u, + LDFF1W_z_p_ai_s = SVE32BitGatherLoad_VectorPlusImmFixed | 0x01006000u +}; + +enum SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsOp : uint32_t { + SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed = 0x84200000u, + SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFMask = 0xFFA08010u, + SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask = 0xFFA0E010u, + PRFB_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed, + PRFH_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00002000u, + PRFW_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00004000u, + PRFD_i_p_bz_s_x32_scaled = SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsFixed | 0x00006000u +}; + +enum SVE32BitGatherPrefetch_VectorPlusImmOp : uint32_t { + SVE32BitGatherPrefetch_VectorPlusImmFixed = 0x8400E000u, + SVE32BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010u, + SVE32BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010u, + PRFB_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed, + PRFH_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x00800000u, + PRFW_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01000000u, + PRFD_i_p_ai_s = SVE32BitGatherPrefetch_VectorPlusImmFixed | 0x01800000u +}; + +enum SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsOp : uint32_t { + SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed = 0xE4608000u, + SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFMask = 0xFE60A000u, + SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsMask = 0xFFE0A000u, + ST1H_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x00800000u, + ST1W_z_p_bz_s_x32_scaled = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed | 0x01000000u +}; + +enum SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsOp : uint32_t { + SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed = 0xE4408000u, + SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFMask = 0xFE60A000u, + SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsMask = 0xFFE0A000u, + ST1B_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed, + ST1H_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x00800000u, + ST1W_z_p_bz_s_x32_unscaled = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed | 0x01000000u +}; + +enum SVE32BitScatterStore_VectorPlusImmOp : uint32_t { + SVE32BitScatterStore_VectorPlusImmFixed = 0xE460A000u, + SVE32BitScatterStore_VectorPlusImmFMask = 0xFE60E000u, + SVE32BitScatterStore_VectorPlusImmMask = 0xFFE0E000u, + ST1B_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed, + ST1H_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x00800000u, + ST1W_z_p_ai_s = SVE32BitScatterStore_VectorPlusImmFixed | 0x01000000u +}; + +enum SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsOp : uint32_t { + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed = 0xC4200000u, + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFMask = 0xFE208000u, + SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsMask = 0xFFA0E000u, + LD1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00800000u, + LDFF1SH_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00802000u, + LD1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00804000u, + LDFF1H_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x00806000u, + LD1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01000000u, + LDFF1SW_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01002000u, + LD1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01004000u, + LDFF1W_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01006000u, + LD1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01804000u, + LDFF1D_z_p_bz_d_x32_scaled = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed | 0x01806000u +}; + +enum SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsOp : uint32_t { + SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000u, + SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFMask = 0xFE608000u, + SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000u, + LD1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00800000u, + LDFF1SH_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00802000u, + LD1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00804000u, + LDFF1H_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x00806000u, + LD1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01000000u, + LDFF1SW_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01002000u, + LD1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01004000u, + LDFF1W_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01006000u, + LD1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01804000u, + LDFF1D_z_p_bz_d_64_scaled = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed | 0x01806000u +}; + +enum SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsOp : uint32_t { + SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed = 0xC4408000u, + SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE608000u, + SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000u, + LD1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed, + LDFF1SB_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00002000u, + LD1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00004000u, + LDFF1B_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00006000u, + LD1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000u, + LDFF1SH_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00802000u, + LD1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00804000u, + LDFF1H_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x00806000u, + LD1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000u, + LDFF1SW_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01002000u, + LD1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01004000u, + LDFF1W_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01006000u, + LD1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01804000u, + LDFF1D_z_p_bz_d_64_unscaled = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed | 0x01806000u +}; + +enum SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsOp : uint32_t { + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xC4000000u, + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE208000u, + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFA0E000u, + LD1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed, + LDFF1SB_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00002000u, + LD1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00004000u, + LDFF1B_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00006000u, + LD1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000u, + LDFF1SH_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00802000u, + LD1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00804000u, + LDFF1H_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00806000u, + LD1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000u, + LDFF1SW_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01002000u, + LD1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01004000u, + LDFF1W_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01006000u, + LD1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01804000u, + LDFF1D_z_p_bz_d_x32_unscaled = SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01806000u +}; + +enum SVE64BitGatherLoad_VectorPlusImmOp : uint32_t { + SVE64BitGatherLoad_VectorPlusImmFixed = 0xC4208000u, + SVE64BitGatherLoad_VectorPlusImmFMask = 0xFE608000u, + SVE64BitGatherLoad_VectorPlusImmMask = 0xFFE0E000u, + LD1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed, + LDFF1SB_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00002000u, + LD1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00004000u, + LDFF1B_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00006000u, + LD1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00800000u, + LDFF1SH_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00802000u, + LD1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00804000u, + LDFF1H_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x00806000u, + LD1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01000000u, + LDFF1SW_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01002000u, + LD1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01004000u, + LDFF1W_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01006000u, + LD1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01804000u, + LDFF1D_z_p_ai_d = SVE64BitGatherLoad_VectorPlusImmFixed | 0x01806000u +}; + +enum SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsOp : uint32_t { + SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed = 0xC4608000u, + SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFMask = 0xFFE08010u, + SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E010u, + PRFB_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed, + PRFH_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00002000u, + PRFW_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00004000u, + PRFD_i_p_bz_d_64_scaled = SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsetsFixed | 0x00006000u +}; + +enum SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsOp : uint32_t { + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xC4200000u, + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFFA08010u, + SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFA0E010u, + PRFB_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed, + PRFH_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00002000u, + PRFW_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00004000u, + PRFD_i_p_bz_d_x32_scaled = SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00006000u +}; + +enum SVE64BitGatherPrefetch_VectorPlusImmOp : uint32_t { + SVE64BitGatherPrefetch_VectorPlusImmFixed = 0xC400E000u, + SVE64BitGatherPrefetch_VectorPlusImmFMask = 0xFE60E010u, + SVE64BitGatherPrefetch_VectorPlusImmMask = 0xFFE0E010u, + PRFB_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed, + PRFH_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x00800000u, + PRFW_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01000000u, + PRFD_i_p_ai_d = SVE64BitGatherPrefetch_VectorPlusImmFixed | 0x01800000u +}; + +enum SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsOp : uint32_t { + SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed = 0xE420A000u, + SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFMask = 0xFE60E000u, + SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsMask = 0xFFE0E000u, + ST1H_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x00800000u, + ST1W_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01000000u, + ST1D_z_p_bz_d_64_scaled = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed | 0x01800000u +}; + +enum SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsOp : uint32_t { + SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed = 0xE400A000u, + SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFMask = 0xFE60E000u, + SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsMask = 0xFFE0E000u, + ST1B_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed, + ST1H_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x00800000u, + ST1W_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01000000u, + ST1D_z_p_bz_d_64_unscaled = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed | 0x01800000u +}; + +enum SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsOp : uint32_t { + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed = 0xE4208000u, + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFMask = 0xFE60A000u, + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsMask = 0xFFE0A000u, + ST1H_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x00800000u, + ST1W_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01000000u, + ST1D_z_p_bz_d_x32_scaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed | 0x01800000u +}; + +enum SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsOp : uint32_t { + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed = 0xE4008000u, + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFMask = 0xFE60A000u, + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsMask = 0xFFE0A000u, + ST1B_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed, + ST1H_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x00800000u, + ST1W_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01000000u, + ST1D_z_p_bz_d_x32_unscaled = SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed | 0x01800000u +}; + +enum SVE64BitScatterStore_VectorPlusImmOp : uint32_t { + SVE64BitScatterStore_VectorPlusImmFixed = 0xE440A000u, + SVE64BitScatterStore_VectorPlusImmFMask = 0xFE60E000u, + SVE64BitScatterStore_VectorPlusImmMask = 0xFFE0E000u, + ST1B_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed, + ST1H_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x00800000u, + ST1W_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01000000u, + ST1D_z_p_ai_d = SVE64BitScatterStore_VectorPlusImmFixed | 0x01800000u +}; + +enum SVEAddressGenerationOp : uint32_t { + SVEAddressGenerationFixed = 0x0420A000u, + SVEAddressGenerationFMask = 0xFF20F000u, + SVEAddressGenerationMask = 0xFFE0F000u, + ADR_z_az_d_s32_scaled = SVEAddressGenerationFixed, + ADR_z_az_d_u32_scaled = SVEAddressGenerationFixed | 0x00400000u, + ADR_z_az_s_same_scaled = SVEAddressGenerationFixed | 0x00800000u, + ADR_z_az_d_same_scaled = SVEAddressGenerationFixed | 0x00C00000u +}; + +enum SVEBitwiseLogicalUnpredicatedOp : uint32_t { + SVEBitwiseLogicalUnpredicatedFixed = 0x04202000u, + SVEBitwiseLogicalUnpredicatedFMask = 0xFF20E000u, + SVEBitwiseLogicalUnpredicatedMask = 0xFFE0FC00u, + AND_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00001000u, + ORR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00401000u, + EOR_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00801000u, + BIC_z_zz = SVEBitwiseLogicalUnpredicatedFixed | 0x00C01000u +}; + +enum SVEBitwiseLogicalWithImm_UnpredicatedOp : uint32_t { + SVEBitwiseLogicalWithImm_UnpredicatedFixed = 0x05000000u, + SVEBitwiseLogicalWithImm_UnpredicatedFMask = 0xFF3C0000u, + SVEBitwiseLogicalWithImm_UnpredicatedMask = 0xFFFC0000u, + ORR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed, + EOR_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00400000u, + AND_z_zi = SVEBitwiseLogicalWithImm_UnpredicatedFixed | 0x00800000u +}; + +enum SVEBitwiseLogical_PredicatedOp : uint32_t { + SVEBitwiseLogical_PredicatedFixed = 0x04180000u, + SVEBitwiseLogical_PredicatedFMask = 0xFF38E000u, + SVEBitwiseLogical_PredicatedMask = 0xFF3FE000u, + ORR_z_p_zz = SVEBitwiseLogical_PredicatedFixed, + EOR_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00010000u, + AND_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00020000u, + BIC_z_p_zz = SVEBitwiseLogical_PredicatedFixed | 0x00030000u +}; + +enum SVEBitwiseShiftByImm_PredicatedOp : uint32_t { + SVEBitwiseShiftByImm_PredicatedFixed = 0x04008000u, + SVEBitwiseShiftByImm_PredicatedFMask = 0xFF30E000u, + SVEBitwiseShiftByImm_PredicatedMask = 0xFF3FE000u, + ASR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed, + LSR_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00010000u, + LSL_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00030000u, + ASRD_z_p_zi = SVEBitwiseShiftByImm_PredicatedFixed | 0x00040000u +}; + +enum SVEBitwiseShiftByVector_PredicatedOp : uint32_t { + SVEBitwiseShiftByVector_PredicatedFixed = 0x04108000u, + SVEBitwiseShiftByVector_PredicatedFMask = 0xFF38E000u, + SVEBitwiseShiftByVector_PredicatedMask = 0xFF3FE000u, + ASR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed, + LSR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00010000u, + LSL_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00030000u, + ASRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00040000u, + LSRR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00050000u, + LSLR_z_p_zz = SVEBitwiseShiftByVector_PredicatedFixed | 0x00070000u +}; + +enum SVEBitwiseShiftByWideElements_PredicatedOp : uint32_t { + SVEBitwiseShiftByWideElements_PredicatedFixed = 0x04188000u, + SVEBitwiseShiftByWideElements_PredicatedFMask = 0xFF38E000u, + SVEBitwiseShiftByWideElements_PredicatedMask = 0xFF3FE000u, + ASR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed, + LSR_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00010000u, + LSL_z_p_zw = SVEBitwiseShiftByWideElements_PredicatedFixed | 0x00030000u +}; + +enum SVEBitwiseShiftUnpredicatedOp : uint32_t { + SVEBitwiseShiftUnpredicatedFixed = 0x04208000u, + SVEBitwiseShiftUnpredicatedFMask = 0xFF20E000u, + SVEBitwiseShiftUnpredicatedMask = 0xFF20FC00u, + ASR_z_zw = SVEBitwiseShiftUnpredicatedFixed, + LSR_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000400u, + LSL_z_zw = SVEBitwiseShiftUnpredicatedFixed | 0x00000C00u, + ASR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001000u, + LSR_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001400u, + LSL_z_zi = SVEBitwiseShiftUnpredicatedFixed | 0x00001C00u +}; + +enum SVEBroadcastBitmaskImmOp : uint32_t { + SVEBroadcastBitmaskImmFixed = 0x05C00000u, + SVEBroadcastBitmaskImmFMask = 0xFFFC0000u, + SVEBroadcastBitmaskImmMask = 0xFFFC0000u, + DUPM_z_i = SVEBroadcastBitmaskImmFixed +}; + +enum SVEBroadcastFPImm_UnpredicatedOp : uint32_t { + SVEBroadcastFPImm_UnpredicatedFixed = 0x2539C000u, + SVEBroadcastFPImm_UnpredicatedFMask = 0xFF39C000u, + SVEBroadcastFPImm_UnpredicatedMask = 0xFF3FE000u, + FDUP_z_i = SVEBroadcastFPImm_UnpredicatedFixed +}; + +enum SVEBroadcastGeneralRegisterOp : uint32_t { + SVEBroadcastGeneralRegisterFixed = 0x05203800u, + SVEBroadcastGeneralRegisterFMask = 0xFF3FFC00u, + SVEBroadcastGeneralRegisterMask = 0xFF3FFC00u, + DUP_z_r = SVEBroadcastGeneralRegisterFixed +}; + +enum SVEBroadcastIndexElementOp : uint32_t { + SVEBroadcastIndexElementFixed = 0x05202000u, + SVEBroadcastIndexElementFMask = 0xFF20FC00u, + SVEBroadcastIndexElementMask = 0xFF20FC00u, + DUP_z_zi = SVEBroadcastIndexElementFixed +}; + +enum SVEBroadcastIntImm_UnpredicatedOp : uint32_t { + SVEBroadcastIntImm_UnpredicatedFixed = 0x2538C000u, + SVEBroadcastIntImm_UnpredicatedFMask = 0xFF39C000u, + SVEBroadcastIntImm_UnpredicatedMask = 0xFF3FC000u, + DUP_z_i = SVEBroadcastIntImm_UnpredicatedFixed +}; + +enum SVECompressActiveElementsOp : uint32_t { + SVECompressActiveElementsFixed = 0x05A18000u, + SVECompressActiveElementsFMask = 0xFFBFE000u, + SVECompressActiveElementsMask = 0xFFBFE000u, + COMPACT_z_p_z = SVECompressActiveElementsFixed +}; + +enum SVEConditionallyBroadcastElementToVectorOp : uint32_t { + SVEConditionallyBroadcastElementToVectorFixed = 0x05288000u, + SVEConditionallyBroadcastElementToVectorFMask = 0xFF3EE000u, + SVEConditionallyBroadcastElementToVectorMask = 0xFF3FE000u, + CLASTA_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed, + CLASTB_z_p_zz = SVEConditionallyBroadcastElementToVectorFixed | 0x00010000u +}; + +enum SVEConditionallyExtractElementToGeneralRegisterOp : uint32_t { + SVEConditionallyExtractElementToGeneralRegisterFixed = 0x0530A000u, + SVEConditionallyExtractElementToGeneralRegisterFMask = 0xFF3EE000u, + SVEConditionallyExtractElementToGeneralRegisterMask = 0xFF3FE000u, + CLASTA_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed, + CLASTB_r_p_z = SVEConditionallyExtractElementToGeneralRegisterFixed | 0x00010000u +}; + +enum SVEConditionallyExtractElementToSIMDFPScalarOp : uint32_t { + SVEConditionallyExtractElementToSIMDFPScalarFixed = 0x052A8000u, + SVEConditionallyExtractElementToSIMDFPScalarFMask = 0xFF3EE000u, + SVEConditionallyExtractElementToSIMDFPScalarMask = 0xFF3FE000u, + CLASTA_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed, + CLASTB_v_p_z = SVEConditionallyExtractElementToSIMDFPScalarFixed | 0x00010000u +}; + +enum SVEConditionallyTerminateScalarsOp : uint32_t { + SVEConditionallyTerminateScalarsFixed = 0x25202000u, + SVEConditionallyTerminateScalarsFMask = 0xFF20FC0Fu, + SVEConditionallyTerminateScalarsMask = 0xFFA0FC1Fu, + CTERMEQ_rr = SVEConditionallyTerminateScalarsFixed | 0x00800000u, + CTERMNE_rr = SVEConditionallyTerminateScalarsFixed | 0x00800010u +}; + +enum SVEConstructivePrefix_UnpredicatedOp : uint32_t { + SVEConstructivePrefix_UnpredicatedFixed = 0x0420BC00u, + SVEConstructivePrefix_UnpredicatedFMask = 0xFF20FC00u, + SVEConstructivePrefix_UnpredicatedMask = 0xFFFFFC00u, + MOVPRFX_z_z = SVEConstructivePrefix_UnpredicatedFixed +}; + +enum SVEContiguousFirstFaultLoad_ScalarPlusScalarOp : uint32_t { + SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed = 0xA4006000u, + SVEContiguousFirstFaultLoad_ScalarPlusScalarFMask = 0xFE00E000u, + SVEContiguousFirstFaultLoad_ScalarPlusScalarMask = 0xFFE0E000u, + LDFF1B_z_p_br_u8 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed, + LDFF1B_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00200000u, + LDFF1B_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00400000u, + LDFF1B_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00600000u, + LDFF1SW_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00800000u, + LDFF1H_z_p_br_u16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00A00000u, + LDFF1H_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00C00000u, + LDFF1H_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x00E00000u, + LDFF1SH_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01000000u, + LDFF1SH_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01200000u, + LDFF1W_z_p_br_u32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01400000u, + LDFF1W_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01600000u, + LDFF1SB_z_p_br_s64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01800000u, + LDFF1SB_z_p_br_s32 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01A00000u, + LDFF1SB_z_p_br_s16 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01C00000u, + LDFF1D_z_p_br_u64 = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed | 0x01E00000u +}; + +enum SVEContiguousLoad_ScalarPlusImmOp : uint32_t { + SVEContiguousLoad_ScalarPlusImmFixed = 0xA400A000u, + SVEContiguousLoad_ScalarPlusImmFMask = 0xFE10E000u, + SVEContiguousLoad_ScalarPlusImmMask = 0xFFF0E000u, + LD1B_z_p_bi_u8 = SVEContiguousLoad_ScalarPlusImmFixed, + LD1B_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00200000u, + LD1B_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00400000u, + LD1B_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00600000u, + LD1SW_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00800000u, + LD1H_z_p_bi_u16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00A00000u, + LD1H_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00C00000u, + LD1H_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x00E00000u, + LD1SH_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01000000u, + LD1SH_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01200000u, + LD1W_z_p_bi_u32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01400000u, + LD1W_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01600000u, + LD1SB_z_p_bi_s64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01800000u, + LD1SB_z_p_bi_s32 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01A00000u, + LD1SB_z_p_bi_s16 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01C00000u, + LD1D_z_p_bi_u64 = SVEContiguousLoad_ScalarPlusImmFixed | 0x01E00000u +}; + +enum SVEContiguousLoad_ScalarPlusScalarOp : uint32_t { + SVEContiguousLoad_ScalarPlusScalarFixed = 0xA4004000u, + SVEContiguousLoad_ScalarPlusScalarFMask = 0xFE00E000u, + SVEContiguousLoad_ScalarPlusScalarMask = 0xFFE0E000u, + LD1B_z_p_br_u8 = SVEContiguousLoad_ScalarPlusScalarFixed, + LD1B_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00200000u, + LD1B_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00400000u, + LD1B_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00600000u, + LD1SW_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00800000u, + LD1H_z_p_br_u16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00A00000u, + LD1H_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00C00000u, + LD1H_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x00E00000u, + LD1SH_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01000000u, + LD1SH_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01200000u, + LD1W_z_p_br_u32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01400000u, + LD1W_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01600000u, + LD1SB_z_p_br_s64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01800000u, + LD1SB_z_p_br_s32 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01A00000u, + LD1SB_z_p_br_s16 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01C00000u, + LD1D_z_p_br_u64 = SVEContiguousLoad_ScalarPlusScalarFixed | 0x01E00000u +}; + +enum SVEContiguousNonFaultLoad_ScalarPlusImmOp : uint32_t { + SVEContiguousNonFaultLoad_ScalarPlusImmFixed = 0xA410A000u, + SVEContiguousNonFaultLoad_ScalarPlusImmFMask = 0xFE10E000u, + SVEContiguousNonFaultLoad_ScalarPlusImmMask = 0xFFF0E000u, + LDNF1B_z_p_bi_u8 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed, + LDNF1B_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00200000u, + LDNF1B_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00400000u, + LDNF1B_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00600000u, + LDNF1SW_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00800000u, + LDNF1H_z_p_bi_u16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00A00000u, + LDNF1H_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00C00000u, + LDNF1H_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x00E00000u, + LDNF1SH_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01000000u, + LDNF1SH_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01200000u, + LDNF1W_z_p_bi_u32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01400000u, + LDNF1W_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01600000u, + LDNF1SB_z_p_bi_s64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01800000u, + LDNF1SB_z_p_bi_s32 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01A00000u, + LDNF1SB_z_p_bi_s16 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01C00000u, + LDNF1D_z_p_bi_u64 = SVEContiguousNonFaultLoad_ScalarPlusImmFixed | 0x01E00000u +}; + +enum SVEContiguousNonTemporalLoad_ScalarPlusImmOp : uint32_t { + SVEContiguousNonTemporalLoad_ScalarPlusImmFixed = 0xA400E000u, + SVEContiguousNonTemporalLoad_ScalarPlusImmFMask = 0xFE70E000u, + SVEContiguousNonTemporalLoad_ScalarPlusImmMask = 0xFFF0E000u, + LDNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed, + LDNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x00800000u, + LDNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01000000u, + LDNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusImmFixed | 0x01800000u +}; + +enum SVEContiguousNonTemporalLoad_ScalarPlusScalarOp : uint32_t { + SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed = 0xA400C000u, + SVEContiguousNonTemporalLoad_ScalarPlusScalarFMask = 0xFE60E000u, + SVEContiguousNonTemporalLoad_ScalarPlusScalarMask = 0xFFE0E000u, + LDNT1B_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed, + LDNT1H_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x00800000u, + LDNT1W_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01000000u, + LDNT1D_z_p_br_contiguous = SVEContiguousNonTemporalLoad_ScalarPlusScalarFixed | 0x01800000u +}; + +enum SVEContiguousNonTemporalStore_ScalarPlusImmOp : uint32_t { + SVEContiguousNonTemporalStore_ScalarPlusImmFixed = 0xE410E000u, + SVEContiguousNonTemporalStore_ScalarPlusImmFMask = 0xFE70E000u, + SVEContiguousNonTemporalStore_ScalarPlusImmMask = 0xFFF0E000u, + STNT1B_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed, + STNT1H_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x00800000u, + STNT1W_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01000000u, + STNT1D_z_p_bi_contiguous = SVEContiguousNonTemporalStore_ScalarPlusImmFixed | 0x01800000u +}; + +enum SVEContiguousNonTemporalStore_ScalarPlusScalarOp : uint32_t { + SVEContiguousNonTemporalStore_ScalarPlusScalarFixed = 0xE4006000u, + SVEContiguousNonTemporalStore_ScalarPlusScalarFMask = 0xFE60E000u, + SVEContiguousNonTemporalStore_ScalarPlusScalarMask = 0xFFE0E000u, + STNT1B_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed, + STNT1H_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x00800000u, + STNT1W_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01000000u, + STNT1D_z_p_br_contiguous = SVEContiguousNonTemporalStore_ScalarPlusScalarFixed | 0x01800000u +}; + +enum SVEContiguousPrefetch_ScalarPlusImmOp : uint32_t { + SVEContiguousPrefetch_ScalarPlusImmFixed = 0x85C00000u, + SVEContiguousPrefetch_ScalarPlusImmFMask = 0xFFC08010u, + SVEContiguousPrefetch_ScalarPlusImmMask = 0xFFC0E010u, + PRFB_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed, + PRFH_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00002000u, + PRFW_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00004000u, + PRFD_i_p_bi_s = SVEContiguousPrefetch_ScalarPlusImmFixed | 0x00006000u +}; + +enum SVEContiguousPrefetch_ScalarPlusScalarOp : uint32_t { + SVEContiguousPrefetch_ScalarPlusScalarFixed = 0x8400C000u, + SVEContiguousPrefetch_ScalarPlusScalarFMask = 0xFE60E010u, + SVEContiguousPrefetch_ScalarPlusScalarMask = 0xFFE0E010u, + PRFB_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed, + PRFH_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x00800000u, + PRFW_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01000000u, + PRFD_i_p_br_s = SVEContiguousPrefetch_ScalarPlusScalarFixed | 0x01800000u +}; + +enum SVEContiguousStore_ScalarPlusImmOp : uint32_t { + SVEContiguousStore_ScalarPlusImmFixed = 0xE400E000u, + SVEContiguousStore_ScalarPlusImmFMask = 0xFE10E000u, + SVEContiguousStore_ScalarPlusImmMask = 0xFF90E000u, + ST1B_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed, + ST1H_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x00800000u, + ST1W_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01000000u, + ST1D_z_p_bi = SVEContiguousStore_ScalarPlusImmFixed | 0x01800000u +}; + +enum SVEContiguousStore_ScalarPlusScalarOp : uint32_t { + SVEContiguousStore_ScalarPlusScalarFixed = 0xE4004000u, + SVEContiguousStore_ScalarPlusScalarFMask = 0xFE00E000u, + SVEContiguousStore_ScalarPlusScalarMask = 0xFF80E000u, + ST1B_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed, + ST1H_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x00800000u, + ST1W_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01000000u, + ST1D_z_p_br = SVEContiguousStore_ScalarPlusScalarFixed | 0x01800000u +}; + +enum SVECopyFPImm_PredicatedOp : uint32_t { + SVECopyFPImm_PredicatedFixed = 0x0510C000u, + SVECopyFPImm_PredicatedFMask = 0xFF30E000u, + SVECopyFPImm_PredicatedMask = 0xFF30E000u, + FCPY_z_p_i = SVECopyFPImm_PredicatedFixed +}; + +enum SVECopyGeneralRegisterToVector_PredicatedOp : uint32_t { + SVECopyGeneralRegisterToVector_PredicatedFixed = 0x0528A000u, + SVECopyGeneralRegisterToVector_PredicatedFMask = 0xFF3FE000u, + SVECopyGeneralRegisterToVector_PredicatedMask = 0xFF3FE000u, + CPY_z_p_r = SVECopyGeneralRegisterToVector_PredicatedFixed +}; + +enum SVECopyIntImm_PredicatedOp : uint32_t { + SVECopyIntImm_PredicatedFixed = 0x05100000u, + SVECopyIntImm_PredicatedFMask = 0xFF308000u, + SVECopyIntImm_PredicatedMask = 0xFF308000u, + CPY_z_p_i = SVECopyIntImm_PredicatedFixed +}; + +enum SVECopySIMDFPScalarRegisterToVector_PredicatedOp : uint32_t { + SVECopySIMDFPScalarRegisterToVector_PredicatedFixed = 0x05208000u, + SVECopySIMDFPScalarRegisterToVector_PredicatedFMask = 0xFF3FE000u, + SVECopySIMDFPScalarRegisterToVector_PredicatedMask = 0xFF3FE000u, + CPY_z_p_v = SVECopySIMDFPScalarRegisterToVector_PredicatedFixed +}; + +enum SVEElementCountOp : uint32_t { + SVEElementCountFixed = 0x0420E000u, + SVEElementCountFMask = 0xFF30F800u, + SVEElementCountMask = 0xFFF0FC00u, + CNTB_r_s = SVEElementCountFixed, + CNTH_r_s = SVEElementCountFixed | 0x00400000u, + CNTW_r_s = SVEElementCountFixed | 0x00800000u, + CNTD_r_s = SVEElementCountFixed | 0x00C00000u +}; + +enum SVEExtractElementToGeneralRegisterOp : uint32_t { + SVEExtractElementToGeneralRegisterFixed = 0x0520A000u, + SVEExtractElementToGeneralRegisterFMask = 0xFF3EE000u, + SVEExtractElementToGeneralRegisterMask = 0xFF3FE000u, + LASTA_r_p_z = SVEExtractElementToGeneralRegisterFixed, + LASTB_r_p_z = SVEExtractElementToGeneralRegisterFixed | 0x00010000u +}; + +enum SVEExtractElementToSIMDFPScalarRegisterOp : uint32_t { + SVEExtractElementToSIMDFPScalarRegisterFixed = 0x05228000u, + SVEExtractElementToSIMDFPScalarRegisterFMask = 0xFF3EE000u, + SVEExtractElementToSIMDFPScalarRegisterMask = 0xFF3FE000u, + LASTA_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed, + LASTB_v_p_z = SVEExtractElementToSIMDFPScalarRegisterFixed | 0x00010000u +}; + +enum SVEFFRInitialiseOp : uint32_t { + SVEFFRInitialiseFixed = 0x252C9000u, + SVEFFRInitialiseFMask = 0xFF3FFFFFu, + SVEFFRInitialiseMask = 0xFFFFFFFFu, + SETFFR_f = SVEFFRInitialiseFixed +}; + +enum SVEFFRWriteFromPredicateOp : uint32_t { + SVEFFRWriteFromPredicateFixed = 0x25289000u, + SVEFFRWriteFromPredicateFMask = 0xFF3FFE1Fu, + SVEFFRWriteFromPredicateMask = 0xFFFFFE1Fu, + WRFFR_f_p = SVEFFRWriteFromPredicateFixed +}; + +enum SVEFPAccumulatingReductionOp : uint32_t { + SVEFPAccumulatingReductionFixed = 0x65182000u, + SVEFPAccumulatingReductionFMask = 0xFF38E000u, + SVEFPAccumulatingReductionMask = 0xFF3FE000u, + FADDA_v_p_z = SVEFPAccumulatingReductionFixed +}; + +enum SVEFPArithmeticUnpredicatedOp : uint32_t { + SVEFPArithmeticUnpredicatedFixed = 0x65000000u, + SVEFPArithmeticUnpredicatedFMask = 0xFF20E000u, + SVEFPArithmeticUnpredicatedMask = 0xFF20FC00u, + FADD_z_zz = SVEFPArithmeticUnpredicatedFixed, + FSUB_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000400u, + FMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000800u, + FTSMUL_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00000C00u, + FRECPS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001800u, + FRSQRTS_z_zz = SVEFPArithmeticUnpredicatedFixed | 0x00001C00u +}; + +enum SVEFPArithmeticWithImm_PredicatedOp : uint32_t { + SVEFPArithmeticWithImm_PredicatedFixed = 0x65188000u, + SVEFPArithmeticWithImm_PredicatedFMask = 0xFF38E3C0u, + SVEFPArithmeticWithImm_PredicatedMask = 0xFF3FE3C0u, + FADD_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed, + FSUB_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00010000u, + FMUL_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00020000u, + FSUBR_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00030000u, + FMAXNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00040000u, + FMINNM_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00050000u, + FMAX_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00060000u, + FMIN_z_p_zs = SVEFPArithmeticWithImm_PredicatedFixed | 0x00070000u +}; + +enum SVEFPArithmetic_PredicatedOp : uint32_t { + SVEFPArithmetic_PredicatedFixed = 0x65008000u, + SVEFPArithmetic_PredicatedFMask = 0xFF30E000u, + SVEFPArithmetic_PredicatedMask = 0xFF3FE000u, + FADD_z_p_zz = SVEFPArithmetic_PredicatedFixed, + FSUB_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00010000u, + FMUL_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00020000u, + FSUBR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00030000u, + FMAXNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00040000u, + FMINNM_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00050000u, + FMAX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00060000u, + FMIN_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00070000u, + FABD_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00080000u, + FSCALE_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x00090000u, + FMULX_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000A0000u, + FDIVR_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000C0000u, + FDIV_z_p_zz = SVEFPArithmetic_PredicatedFixed | 0x000D0000u +}; + +enum SVEFPCompareVectorsOp : uint32_t { + SVEFPCompareVectorsFixed = 0x65004000u, + SVEFPCompareVectorsFMask = 0xFF204000u, + SVEFPCompareVectorsMask = 0xFF20E010u, + FCMGE_p_p_zz = SVEFPCompareVectorsFixed, + FCMGT_p_p_zz = SVEFPCompareVectorsFixed | 0x00000010u, + FCMEQ_p_p_zz = SVEFPCompareVectorsFixed | 0x00002000u, + FCMNE_p_p_zz = SVEFPCompareVectorsFixed | 0x00002010u, + FCMUO_p_p_zz = SVEFPCompareVectorsFixed | 0x00008000u, + FACGE_p_p_zz = SVEFPCompareVectorsFixed | 0x00008010u, + FACGT_p_p_zz = SVEFPCompareVectorsFixed | 0x0000A010u +}; + +enum SVEFPCompareWithZeroOp : uint32_t { + SVEFPCompareWithZeroFixed = 0x65102000u, + SVEFPCompareWithZeroFMask = 0xFF38E000u, + SVEFPCompareWithZeroMask = 0xFF3FE010u, + FCMGE_p_p_z0 = SVEFPCompareWithZeroFixed, + FCMGT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00000010u, + FCMLT_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010000u, + FCMLE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00010010u, + FCMEQ_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00020000u, + FCMNE_p_p_z0 = SVEFPCompareWithZeroFixed | 0x00030000u +}; + +enum SVEFPComplexAdditionOp : uint32_t { + SVEFPComplexAdditionFixed = 0x64008000u, + SVEFPComplexAdditionFMask = 0xFF3EE000u, + SVEFPComplexAdditionMask = 0xFF3EE000u, + FCADD_z_p_zz = SVEFPComplexAdditionFixed +}; + +enum SVEFPComplexMulAddOp : uint32_t { + SVEFPComplexMulAddFixed = 0x64000000u, + SVEFPComplexMulAddFMask = 0xFF208000u, + SVEFPComplexMulAddMask = 0xFF208000u, + FCMLA_z_p_zzz = SVEFPComplexMulAddFixed +}; + +enum SVEFPComplexMulAddIndexOp : uint32_t { + SVEFPComplexMulAddIndexFixed = 0x64201000u, + SVEFPComplexMulAddIndexFMask = 0xFF20F000u, + SVEFPComplexMulAddIndexMask = 0xFFE0F000u, + FCMLA_z_zzzi_h = SVEFPComplexMulAddIndexFixed | 0x00800000u, + FCMLA_z_zzzi_s = SVEFPComplexMulAddIndexFixed | 0x00C00000u +}; + +enum SVEFPConvertPrecisionOp : uint32_t { + SVEFPConvertPrecisionFixed = 0x6508A000u, + SVEFPConvertPrecisionFMask = 0xFF3CE000u, + SVEFPConvertPrecisionMask = 0xFFFFE000u, + FCVT_z_p_z_s2h = SVEFPConvertPrecisionFixed | 0x00800000u, + FCVT_z_p_z_h2s = SVEFPConvertPrecisionFixed | 0x00810000u, + FCVT_z_p_z_d2h = SVEFPConvertPrecisionFixed | 0x00C00000u, + FCVT_z_p_z_h2d = SVEFPConvertPrecisionFixed | 0x00C10000u, + FCVT_z_p_z_d2s = SVEFPConvertPrecisionFixed | 0x00C20000u, + FCVT_z_p_z_s2d = SVEFPConvertPrecisionFixed | 0x00C30000u +}; + +enum SVEFPConvertToIntOp : uint32_t { + SVEFPConvertToIntFixed = 0x6518A000u, + SVEFPConvertToIntFMask = 0xFF38E000u, + SVEFPConvertToIntMask = 0xFFFFE000u, + FCVTZS_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00420000u, + FCVTZU_z_p_z_fp162h = SVEFPConvertToIntFixed | 0x00430000u, + FCVTZS_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00440000u, + FCVTZU_z_p_z_fp162w = SVEFPConvertToIntFixed | 0x00450000u, + FCVTZS_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00460000u, + FCVTZU_z_p_z_fp162x = SVEFPConvertToIntFixed | 0x00470000u, + FCVTZS_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00840000u, + FCVTZU_z_p_z_s2w = SVEFPConvertToIntFixed | 0x00850000u, + FCVTZS_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C00000u, + FCVTZU_z_p_z_d2w = SVEFPConvertToIntFixed | 0x00C10000u, + FCVTZS_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C40000u, + FCVTZU_z_p_z_s2x = SVEFPConvertToIntFixed | 0x00C50000u, + FCVTZS_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C60000u, + FCVTZU_z_p_z_d2x = SVEFPConvertToIntFixed | 0x00C70000u +}; + +enum SVEFPExponentialAcceleratorOp : uint32_t { + SVEFPExponentialAcceleratorFixed = 0x0420B800u, + SVEFPExponentialAcceleratorFMask = 0xFF20FC00u, + SVEFPExponentialAcceleratorMask = 0xFF3FFC00u, + FEXPA_z_z = SVEFPExponentialAcceleratorFixed +}; + +enum SVEFPFastReductionOp : uint32_t { + SVEFPFastReductionFixed = 0x65002000u, + SVEFPFastReductionFMask = 0xFF38E000u, + SVEFPFastReductionMask = 0xFF3FE000u, + FADDV_v_p_z = SVEFPFastReductionFixed, + FMAXNMV_v_p_z = SVEFPFastReductionFixed | 0x00040000u, + FMINNMV_v_p_z = SVEFPFastReductionFixed | 0x00050000u, + FMAXV_v_p_z = SVEFPFastReductionFixed | 0x00060000u, + FMINV_v_p_z = SVEFPFastReductionFixed | 0x00070000u +}; + +enum SVEFPMulAddOp : uint32_t { + SVEFPMulAddFixed = 0x65200000u, + SVEFPMulAddFMask = 0xFF200000u, + SVEFPMulAddMask = 0xFF20E000u, + FMLA_z_p_zzz = SVEFPMulAddFixed, + FMLS_z_p_zzz = SVEFPMulAddFixed | 0x00002000u, + FNMLA_z_p_zzz = SVEFPMulAddFixed | 0x00004000u, + FNMLS_z_p_zzz = SVEFPMulAddFixed | 0x00006000u, + FMAD_z_p_zzz = SVEFPMulAddFixed | 0x00008000u, + FMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000A000u, + FNMAD_z_p_zzz = SVEFPMulAddFixed | 0x0000C000u, + FNMSB_z_p_zzz = SVEFPMulAddFixed | 0x0000E000u +}; + +enum SVEFPMulAddIndexOp : uint32_t { + SVEFPMulAddIndexFixed = 0x64200000u, + SVEFPMulAddIndexFMask = 0xFF20F800u, + SVEFPMulAddIndexMask = 0xFFE0FC00u, + FMLA_z_zzzi_h = SVEFPMulAddIndexFixed, + FMLA_z_zzzi_h_i3h = FMLA_z_zzzi_h | 0x00400000u, + FMLS_z_zzzi_h = SVEFPMulAddIndexFixed | 0x00000400u, + FMLS_z_zzzi_h_i3h = FMLS_z_zzzi_h | 0x00400000u, + FMLA_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800000u, + FMLS_z_zzzi_s = SVEFPMulAddIndexFixed | 0x00800400u, + FMLA_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00000u, + FMLS_z_zzzi_d = SVEFPMulAddIndexFixed | 0x00C00400u +}; + +enum SVEFPMulIndexOp : uint32_t { + SVEFPMulIndexFixed = 0x64202000u, + SVEFPMulIndexFMask = 0xFF20FC00u, + SVEFPMulIndexMask = 0xFFE0FC00u, + FMUL_z_zzi_h = SVEFPMulIndexFixed, + FMUL_z_zzi_h_i3h = FMUL_z_zzi_h | 0x00400000u, + FMUL_z_zzi_s = SVEFPMulIndexFixed | 0x00800000u, + FMUL_z_zzi_d = SVEFPMulIndexFixed | 0x00C00000u +}; + +enum SVEFPRoundToIntegralValueOp : uint32_t { + SVEFPRoundToIntegralValueFixed = 0x6500A000u, + SVEFPRoundToIntegralValueFMask = 0xFF38E000u, + SVEFPRoundToIntegralValueMask = 0xFF3FE000u, + FRINTN_z_p_z = SVEFPRoundToIntegralValueFixed, + FRINTP_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00010000u, + FRINTM_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00020000u, + FRINTZ_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00030000u, + FRINTA_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00040000u, + FRINTX_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00060000u, + FRINTI_z_p_z = SVEFPRoundToIntegralValueFixed | 0x00070000u +}; + +enum SVEFPTrigMulAddCoefficientOp : uint32_t { + SVEFPTrigMulAddCoefficientFixed = 0x65108000u, + SVEFPTrigMulAddCoefficientFMask = 0xFF38FC00u, + SVEFPTrigMulAddCoefficientMask = 0xFF38FC00u, + FTMAD_z_zzi = SVEFPTrigMulAddCoefficientFixed +}; + +enum SVEFPTrigSelectCoefficientOp : uint32_t { + SVEFPTrigSelectCoefficientFixed = 0x0420B000u, + SVEFPTrigSelectCoefficientFMask = 0xFF20F800u, + SVEFPTrigSelectCoefficientMask = 0xFF20FC00u, + FTSSEL_z_zz = SVEFPTrigSelectCoefficientFixed +}; + +enum SVEFPUnaryOpOp : uint32_t { + SVEFPUnaryOpFixed = 0x650CA000u, + SVEFPUnaryOpFMask = 0xFF3CE000u, + SVEFPUnaryOpMask = 0xFF3FE000u, + FRECPX_z_p_z = SVEFPUnaryOpFixed, + FSQRT_z_p_z = SVEFPUnaryOpFixed | 0x00010000u +}; + +enum SVEFPUnaryOpUnpredicatedOp : uint32_t { + SVEFPUnaryOpUnpredicatedFixed = 0x65083000u, + SVEFPUnaryOpUnpredicatedFMask = 0xFF38F000u, + SVEFPUnaryOpUnpredicatedMask = 0xFF3FFC00u, + FRECPE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00060000u, + FRSQRTE_z_z = SVEFPUnaryOpUnpredicatedFixed | 0x00070000u +}; + +enum SVEIncDecByPredicateCountOp : uint32_t { + SVEIncDecByPredicateCountFixed = 0x25288000u, + SVEIncDecByPredicateCountFMask = 0xFF38F000u, + SVEIncDecByPredicateCountMask = 0xFF3FFE00u, + SQINCP_z_p_z = SVEIncDecByPredicateCountFixed, + SQINCP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00000800u, + SQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00000C00u, + UQINCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00010000u, + UQINCP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00010800u, + UQINCP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00010C00u, + SQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00020000u, + SQDECP_r_p_r_sx = SVEIncDecByPredicateCountFixed | 0x00020800u, + SQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00020C00u, + UQDECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00030000u, + UQDECP_r_p_r_uw = SVEIncDecByPredicateCountFixed | 0x00030800u, + UQDECP_r_p_r_x = SVEIncDecByPredicateCountFixed | 0x00030C00u, + INCP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00040000u, + INCP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00040800u, + DECP_z_p_z = SVEIncDecByPredicateCountFixed | 0x00050000u, + DECP_r_p_r = SVEIncDecByPredicateCountFixed | 0x00050800u +}; + +enum SVEIncDecRegisterByElementCountOp : uint32_t { + SVEIncDecRegisterByElementCountFixed = 0x0430E000u, + SVEIncDecRegisterByElementCountFMask = 0xFF30F800u, + SVEIncDecRegisterByElementCountMask = 0xFFF0FC00u, + INCB_r_rs = SVEIncDecRegisterByElementCountFixed, + DECB_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00000400u, + INCH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400000u, + DECH_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00400400u, + INCW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800000u, + DECW_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00800400u, + INCD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00000u, + DECD_r_rs = SVEIncDecRegisterByElementCountFixed | 0x00C00400u +}; + +enum SVEIncDecVectorByElementCountOp : uint32_t { + SVEIncDecVectorByElementCountFixed = 0x0430C000u, + SVEIncDecVectorByElementCountFMask = 0xFF30F800u, + SVEIncDecVectorByElementCountMask = 0xFFF0FC00u, + INCH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400000u, + DECH_z_zs = SVEIncDecVectorByElementCountFixed | 0x00400400u, + INCW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800000u, + DECW_z_zs = SVEIncDecVectorByElementCountFixed | 0x00800400u, + INCD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00000u, + DECD_z_zs = SVEIncDecVectorByElementCountFixed | 0x00C00400u +}; + +enum SVEIndexGenerationOp : uint32_t { + SVEIndexGenerationFixed = 0x04204000u, + SVEIndexGenerationFMask = 0xFF20F000u, + SVEIndexGenerationMask = 0xFF20FC00u, + INDEX_z_ii = SVEIndexGenerationFixed, + INDEX_z_ri = SVEIndexGenerationFixed | 0x00000400u, + INDEX_z_ir = SVEIndexGenerationFixed | 0x00000800u, + INDEX_z_rr = SVEIndexGenerationFixed | 0x00000C00u +}; + +enum SVEInsertGeneralRegisterOp : uint32_t { + SVEInsertGeneralRegisterFixed = 0x05243800u, + SVEInsertGeneralRegisterFMask = 0xFF3FFC00u, + SVEInsertGeneralRegisterMask = 0xFF3FFC00u, + INSR_z_r = SVEInsertGeneralRegisterFixed +}; + +enum SVEInsertSIMDFPScalarRegisterOp : uint32_t { + SVEInsertSIMDFPScalarRegisterFixed = 0x05343800u, + SVEInsertSIMDFPScalarRegisterFMask = 0xFF3FFC00u, + SVEInsertSIMDFPScalarRegisterMask = 0xFF3FFC00u, + INSR_z_v = SVEInsertSIMDFPScalarRegisterFixed +}; + +enum SVEIntAddSubtractImm_UnpredicatedOp : uint32_t { + SVEIntAddSubtractImm_UnpredicatedFixed = 0x2520C000u, + SVEIntAddSubtractImm_UnpredicatedFMask = 0xFF38C000u, + SVEIntAddSubtractImm_UnpredicatedMask = 0xFF3FC000u, + ADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed, + SUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00010000u, + SUBR_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00030000u, + SQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00040000u, + UQADD_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00050000u, + SQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00060000u, + UQSUB_z_zi = SVEIntAddSubtractImm_UnpredicatedFixed | 0x00070000u +}; + +enum SVEIntAddSubtractVectors_PredicatedOp : uint32_t { + SVEIntAddSubtractVectors_PredicatedFixed = 0x04000000u, + SVEIntAddSubtractVectors_PredicatedFMask = 0xFF38E000u, + SVEIntAddSubtractVectors_PredicatedMask = 0xFF3FE000u, + ADD_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed, + SUB_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00010000u, + SUBR_z_p_zz = SVEIntAddSubtractVectors_PredicatedFixed | 0x00030000u +}; + +enum SVEIntArithmeticUnpredicatedOp : uint32_t { + SVEIntArithmeticUnpredicatedFixed = 0x04200000u, + SVEIntArithmeticUnpredicatedFMask = 0xFF20E000u, + SVEIntArithmeticUnpredicatedMask = 0xFF20FC00u, + ADD_z_zz = SVEIntArithmeticUnpredicatedFixed, + SUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00000400u, + SQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001000u, + UQADD_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001400u, + SQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001800u, + UQSUB_z_zz = SVEIntArithmeticUnpredicatedFixed | 0x00001C00u +}; + +enum SVEIntCompareScalarCountAndLimitOp : uint32_t { + SVEIntCompareScalarCountAndLimitFixed = 0x25200000u, + SVEIntCompareScalarCountAndLimitFMask = 0xFF20E000u, + SVEIntCompareScalarCountAndLimitMask = 0xFF20EC10u, + WHILELT_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000400u, + WHILELE_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000410u, + WHILELO_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C00u, + WHILELS_p_p_rr = SVEIntCompareScalarCountAndLimitFixed | 0x00000C10u +}; + +enum SVEIntCompareSignedImmOp : uint32_t { + SVEIntCompareSignedImmFixed = 0x25000000u, + SVEIntCompareSignedImmFMask = 0xFF204000u, + SVEIntCompareSignedImmMask = 0xFF20E010u, + CMPGE_p_p_zi = SVEIntCompareSignedImmFixed, + CMPGT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00000010u, + CMPLT_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002000u, + CMPLE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00002010u, + CMPEQ_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008000u, + CMPNE_p_p_zi = SVEIntCompareSignedImmFixed | 0x00008010u +}; + +enum SVEIntCompareUnsignedImmOp : uint32_t { + SVEIntCompareUnsignedImmFixed = 0x24200000u, + SVEIntCompareUnsignedImmFMask = 0xFF200000u, + SVEIntCompareUnsignedImmMask = 0xFF202010u, + CMPHS_p_p_zi = SVEIntCompareUnsignedImmFixed, + CMPHI_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00000010u, + CMPLO_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002000u, + CMPLS_p_p_zi = SVEIntCompareUnsignedImmFixed | 0x00002010u +}; + +enum SVEIntCompareVectorsOp : uint32_t { + SVEIntCompareVectorsFixed = 0x24000000u, + SVEIntCompareVectorsFMask = 0xFF200000u, + SVEIntCompareVectorsMask = 0xFF20E010u, + CMPHS_p_p_zz = SVEIntCompareVectorsFixed, + CMPHI_p_p_zz = SVEIntCompareVectorsFixed | 0x00000010u, + CMPEQ_p_p_zw = SVEIntCompareVectorsFixed | 0x00002000u, + CMPNE_p_p_zw = SVEIntCompareVectorsFixed | 0x00002010u, + CMPGE_p_p_zw = SVEIntCompareVectorsFixed | 0x00004000u, + CMPGT_p_p_zw = SVEIntCompareVectorsFixed | 0x00004010u, + CMPLT_p_p_zw = SVEIntCompareVectorsFixed | 0x00006000u, + CMPLE_p_p_zw = SVEIntCompareVectorsFixed | 0x00006010u, + CMPGE_p_p_zz = SVEIntCompareVectorsFixed | 0x00008000u, + CMPGT_p_p_zz = SVEIntCompareVectorsFixed | 0x00008010u, + CMPEQ_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A000u, + CMPNE_p_p_zz = SVEIntCompareVectorsFixed | 0x0000A010u, + CMPHS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C000u, + CMPHI_p_p_zw = SVEIntCompareVectorsFixed | 0x0000C010u, + CMPLO_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E000u, + CMPLS_p_p_zw = SVEIntCompareVectorsFixed | 0x0000E010u +}; + +enum SVEIntConvertToFPOp : uint32_t { + SVEIntConvertToFPFixed = 0x6510A000u, + SVEIntConvertToFPFMask = 0xFF38E000u, + SVEIntConvertToFPMask = 0xFFFFE000u, + SCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00420000u, + UCVTF_z_p_z_h2fp16 = SVEIntConvertToFPFixed | 0x00430000u, + SCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00440000u, + UCVTF_z_p_z_w2fp16 = SVEIntConvertToFPFixed | 0x00450000u, + SCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00460000u, + UCVTF_z_p_z_x2fp16 = SVEIntConvertToFPFixed | 0x00470000u, + SCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00840000u, + UCVTF_z_p_z_w2s = SVEIntConvertToFPFixed | 0x00850000u, + SCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C00000u, + UCVTF_z_p_z_w2d = SVEIntConvertToFPFixed | 0x00C10000u, + SCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C40000u, + UCVTF_z_p_z_x2s = SVEIntConvertToFPFixed | 0x00C50000u, + SCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C60000u, + UCVTF_z_p_z_x2d = SVEIntConvertToFPFixed | 0x00C70000u +}; + +enum SVEIntDivideVectors_PredicatedOp : uint32_t { + SVEIntDivideVectors_PredicatedFixed = 0x04140000u, + SVEIntDivideVectors_PredicatedFMask = 0xFF3CE000u, + SVEIntDivideVectors_PredicatedMask = 0xFF3FE000u, + SDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed, + UDIV_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00010000u, + SDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00020000u, + UDIVR_z_p_zz = SVEIntDivideVectors_PredicatedFixed | 0x00030000u +}; + +enum SVEIntMinMaxDifference_PredicatedOp : uint32_t { + SVEIntMinMaxDifference_PredicatedFixed = 0x04080000u, + SVEIntMinMaxDifference_PredicatedFMask = 0xFF38E000u, + SVEIntMinMaxDifference_PredicatedMask = 0xFF3FE000u, + SMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed, + UMAX_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00010000u, + SMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00020000u, + UMIN_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00030000u, + SABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00040000u, + UABD_z_p_zz = SVEIntMinMaxDifference_PredicatedFixed | 0x00050000u +}; + +enum SVEIntMinMaxImm_UnpredicatedOp : uint32_t { + SVEIntMinMaxImm_UnpredicatedFixed = 0x2528C000u, + SVEIntMinMaxImm_UnpredicatedFMask = 0xFF38C000u, + SVEIntMinMaxImm_UnpredicatedMask = 0xFF3FE000u, + SMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed, + UMAX_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00010000u, + SMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00020000u, + UMIN_z_zi = SVEIntMinMaxImm_UnpredicatedFixed | 0x00030000u +}; + +enum SVEIntMulAddPredicatedOp : uint32_t { + SVEIntMulAddPredicatedFixed = 0x04004000u, + SVEIntMulAddPredicatedFMask = 0xFF204000u, + SVEIntMulAddPredicatedMask = 0xFF20E000u, + MLA_z_p_zzz = SVEIntMulAddPredicatedFixed, + MLS_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00002000u, + MAD_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x00008000u, + MSB_z_p_zzz = SVEIntMulAddPredicatedFixed | 0x0000A000u +}; + +enum SVEIntMulAddUnpredicatedOp : uint32_t { + SVEIntMulAddUnpredicatedFixed = 0x44000000u, + SVEIntMulAddUnpredicatedFMask = 0xFF208000u, + SVEIntMulAddUnpredicatedMask = 0xFF20FC00u, + SDOT_z_zzz = SVEIntMulAddUnpredicatedFixed, + UDOT_z_zzz = SVEIntMulAddUnpredicatedFixed | 0x00000400u +}; + +enum SVEIntMulImm_UnpredicatedOp : uint32_t { + SVEIntMulImm_UnpredicatedFixed = 0x2530C000u, + SVEIntMulImm_UnpredicatedFMask = 0xFF38C000u, + SVEIntMulImm_UnpredicatedMask = 0xFF3FE000u, + MUL_z_zi = SVEIntMulImm_UnpredicatedFixed +}; + +enum SVEIntMulVectors_PredicatedOp : uint32_t { + SVEIntMulVectors_PredicatedFixed = 0x04100000u, + SVEIntMulVectors_PredicatedFMask = 0xFF3CE000u, + SVEIntMulVectors_PredicatedMask = 0xFF3FE000u, + MUL_z_p_zz = SVEIntMulVectors_PredicatedFixed, + SMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00020000u, + UMULH_z_p_zz = SVEIntMulVectors_PredicatedFixed | 0x00030000u +}; + +enum SVEMovprfxOp : uint32_t { + SVEMovprfxFixed = 0x04002000u, + SVEMovprfxFMask = 0xFF20E000u, + SVEMovprfxMask = 0xFF3EE000u, + MOVPRFX_z_p_z = SVEMovprfxFixed | 0x00100000u +}; + +enum SVEIntReductionOp : uint32_t { + SVEIntReductionFixed = 0x04002000u, + SVEIntReductionFMask = 0xFF20E000u, + SVEIntReductionMask = 0xFF3FE000u, + SADDV_r_p_z = SVEIntReductionFixed, + UADDV_r_p_z = SVEIntReductionFixed | 0x00010000u, + SMAXV_r_p_z = SVEIntReductionFixed | 0x00080000u, + UMAXV_r_p_z = SVEIntReductionFixed | 0x00090000u, + SMINV_r_p_z = SVEIntReductionFixed | 0x000A0000u, + UMINV_r_p_z = SVEIntReductionFixed | 0x000B0000u +}; + +enum SVEIntReductionLogicalOp : uint32_t { + SVEIntReductionLogicalFixed = 0x04182000u, + SVEIntReductionLogicalFMask = 0xFF38E000u, + SVEIntReductionLogicalMask = 0xFF3FE000u, + ORV_r_p_z = SVEIntReductionLogicalFixed | 0x00180000u, + EORV_r_p_z = SVEIntReductionLogicalFixed | 0x00190000u, + ANDV_r_p_z = SVEIntReductionLogicalFixed | 0x001A0000u +}; + +enum SVEIntUnaryArithmeticPredicatedOp : uint32_t { + SVEIntUnaryArithmeticPredicatedFixed = 0x0400A000u, + SVEIntUnaryArithmeticPredicatedFMask = 0xFF20E000u, + SVEIntUnaryArithmeticPredicatedMask = 0xFF3FE000u, + SXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00100000u, + UXTB_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00110000u, + SXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00120000u, + UXTH_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00130000u, + SXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00140000u, + UXTW_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00150000u, + ABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00160000u, + NEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00170000u, + CLS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00180000u, + CLZ_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x00190000u, + CNT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001A0000u, + CNOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001B0000u, + FABS_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001C0000u, + FNEG_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001D0000u, + NOT_z_p_z = SVEIntUnaryArithmeticPredicatedFixed | 0x001E0000u +}; + +enum SVELoadAndBroadcastElementOp : uint32_t { + SVELoadAndBroadcastElementFixed = 0x84408000u, + SVELoadAndBroadcastElementFMask = 0xFE408000u, + SVELoadAndBroadcastElementMask = 0xFFC0E000u, + LD1RB_z_p_bi_u8 = SVELoadAndBroadcastElementFixed, + LD1RB_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00002000u, + LD1RB_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00004000u, + LD1RB_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00006000u, + LD1RSW_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x00800000u, + LD1RH_z_p_bi_u16 = SVELoadAndBroadcastElementFixed | 0x00802000u, + LD1RH_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x00804000u, + LD1RH_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x00806000u, + LD1RSH_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01000000u, + LD1RSH_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01002000u, + LD1RW_z_p_bi_u32 = SVELoadAndBroadcastElementFixed | 0x01004000u, + LD1RW_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01006000u, + LD1RSB_z_p_bi_s64 = SVELoadAndBroadcastElementFixed | 0x01800000u, + LD1RSB_z_p_bi_s32 = SVELoadAndBroadcastElementFixed | 0x01802000u, + LD1RSB_z_p_bi_s16 = SVELoadAndBroadcastElementFixed | 0x01804000u, + LD1RD_z_p_bi_u64 = SVELoadAndBroadcastElementFixed | 0x01806000u +}; + +enum SVELoadAndBroadcastQuadword_ScalarPlusImmOp : uint32_t { + SVELoadAndBroadcastQuadword_ScalarPlusImmFixed = 0xA4002000u, + SVELoadAndBroadcastQuadword_ScalarPlusImmFMask = 0xFE10E000u, + SVELoadAndBroadcastQuadword_ScalarPlusImmMask = 0xFFF0E000u, + LD1RQB_z_p_bi_u8 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed, + LD1RQH_z_p_bi_u16 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x00800000u, + LD1RQW_z_p_bi_u32 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01000000u, + LD1RQD_z_p_bi_u64 = SVELoadAndBroadcastQuadword_ScalarPlusImmFixed | 0x01800000u +}; + +enum SVELoadAndBroadcastQuadword_ScalarPlusScalarOp : uint32_t { + SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed = 0xA4000000u, + SVELoadAndBroadcastQuadword_ScalarPlusScalarFMask = 0xFE00E000u, + SVELoadAndBroadcastQuadword_ScalarPlusScalarMask = 0xFFE0E000u, + LD1RQB_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed, + LD1RQH_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x00800000u, + LD1RQW_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01000000u, + LD1RQD_z_p_br_contiguous = SVELoadAndBroadcastQuadword_ScalarPlusScalarFixed | 0x01800000u +}; + +enum SVELoadMultipleStructures_ScalarPlusImmOp : uint32_t { + SVELoadMultipleStructures_ScalarPlusImmFixed = 0xA400E000u, + SVELoadMultipleStructures_ScalarPlusImmFMask = 0xFE10E000u, + SVELoadMultipleStructures_ScalarPlusImmMask = 0xFFF0E000u, + LD2B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00200000u, + LD3B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00400000u, + LD4B_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00600000u, + LD2H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00A00000u, + LD3H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00C00000u, + LD4H_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x00E00000u, + LD2W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01200000u, + LD3W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01400000u, + LD4W_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01600000u, + LD2D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01A00000u, + LD3D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01C00000u, + LD4D_z_p_bi_contiguous = SVELoadMultipleStructures_ScalarPlusImmFixed | 0x01E00000u +}; + +enum SVELoadMultipleStructures_ScalarPlusScalarOp : uint32_t { + SVELoadMultipleStructures_ScalarPlusScalarFixed = 0xA400C000u, + SVELoadMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000u, + SVELoadMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000u, + LD2B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00200000u, + LD3B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00400000u, + LD4B_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00600000u, + LD2H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00A00000u, + LD3H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00C00000u, + LD4H_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x00E00000u, + LD2W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01200000u, + LD3W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01400000u, + LD4W_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01600000u, + LD2D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01A00000u, + LD3D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01C00000u, + LD4D_z_p_br_contiguous = SVELoadMultipleStructures_ScalarPlusScalarFixed | 0x01E00000u +}; + +enum SVELoadPredicateRegisterOp : uint32_t { + SVELoadPredicateRegisterFixed = 0x85800000u, + SVELoadPredicateRegisterFMask = 0xFFC0E010u, + SVELoadPredicateRegisterMask = 0xFFC0E010u, + LDR_p_bi = SVELoadPredicateRegisterFixed +}; + +enum SVELoadVectorRegisterOp : uint32_t { + SVELoadVectorRegisterFixed = 0x85804000u, + SVELoadVectorRegisterFMask = 0xFFC0E000u, + SVELoadVectorRegisterMask = 0xFFC0E000u, + LDR_z_bi = SVELoadVectorRegisterFixed +}; + +enum SVEMulIndexOp : uint32_t { + SVEMulIndexFixed = 0x44200000u, + SVEMulIndexFMask = 0xFF200000u, + SVEMulIndexMask = 0xFFE0FC00u, + SDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800000u, + UDOT_z_zzzi_s = SVEMulIndexFixed | 0x00800400u, + SDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00000u, + UDOT_z_zzzi_d = SVEMulIndexFixed | 0x00C00400u +}; + +enum SVEPartitionBreakConditionOp : uint32_t { + SVEPartitionBreakConditionFixed = 0x25104000u, + SVEPartitionBreakConditionFMask = 0xFF3FC200u, + SVEPartitionBreakConditionMask = 0xFFFFC200u, + BRKA_p_p_p = SVEPartitionBreakConditionFixed, + BRKAS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00400000u, + BRKB_p_p_p = SVEPartitionBreakConditionFixed | 0x00800000u, + BRKBS_p_p_p_z = SVEPartitionBreakConditionFixed | 0x00C00000u +}; + +enum SVEPermutePredicateElementsOp : uint32_t { + SVEPermutePredicateElementsFixed = 0x05204000u, + SVEPermutePredicateElementsFMask = 0xFF30E210u, + SVEPermutePredicateElementsMask = 0xFF30FE10u, + ZIP1_p_pp = SVEPermutePredicateElementsFixed, + ZIP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000400u, + UZP1_p_pp = SVEPermutePredicateElementsFixed | 0x00000800u, + UZP2_p_pp = SVEPermutePredicateElementsFixed | 0x00000C00u, + TRN1_p_pp = SVEPermutePredicateElementsFixed | 0x00001000u, + TRN2_p_pp = SVEPermutePredicateElementsFixed | 0x00001400u +}; + +enum SVEPermuteVectorExtractOp : uint32_t { + SVEPermuteVectorExtractFixed = 0x05200000u, + SVEPermuteVectorExtractFMask = 0xFF20E000u, + SVEPermuteVectorExtractMask = 0xFFE0E000u, + EXT_z_zi_des = SVEPermuteVectorExtractFixed +}; + +enum SVEPermuteVectorInterleavingOp : uint32_t { + SVEPermuteVectorInterleavingFixed = 0x05206000u, + SVEPermuteVectorInterleavingFMask = 0xFF20E000u, + SVEPermuteVectorInterleavingMask = 0xFF20FC00u, + ZIP1_z_zz = SVEPermuteVectorInterleavingFixed, + ZIP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000400u, + UZP1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000800u, + UZP2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00000C00u, + TRN1_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001000u, + TRN2_z_zz = SVEPermuteVectorInterleavingFixed | 0x00001400u +}; + +enum SVEPredicateCountOp : uint32_t { + SVEPredicateCountFixed = 0x25208000u, + SVEPredicateCountFMask = 0xFF38C000u, + SVEPredicateCountMask = 0xFF3FC200u, + CNTP_r_p_p = SVEPredicateCountFixed +}; + +enum SVEPredicateFirstActiveOp : uint32_t { + SVEPredicateFirstActiveFixed = 0x2518C000u, + SVEPredicateFirstActiveFMask = 0xFF3FFE10u, + SVEPredicateFirstActiveMask = 0xFFFFFE10u, + PFIRST_p_p_p = SVEPredicateFirstActiveFixed | 0x00400000u +}; + +enum SVEPredicateInitializeOp : uint32_t { + SVEPredicateInitializeFixed = 0x2518E000u, + SVEPredicateInitializeFMask = 0xFF3EFC10u, + SVEPredicateInitializeMask = 0xFF3FFC10u, + SVEPredicateInitializeSetFlagsBit = 0x00010000u, + PTRUE_p_s = SVEPredicateInitializeFixed | 0x00000000u, + PTRUES_p_s = SVEPredicateInitializeFixed | SVEPredicateInitializeSetFlagsBit +}; + +enum SVEPredicateLogicalOp : uint32_t { + SVEPredicateLogicalFixed = 0x25004000u, + SVEPredicateLogicalFMask = 0xFF30C000u, + SVEPredicateLogicalMask = 0xFFF0C210u, + SVEPredicateLogicalSetFlagsBit = 0x00400000u, + AND_p_p_pp_z = SVEPredicateLogicalFixed, + ANDS_p_p_pp_z = AND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + BIC_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000010u, + BICS_p_p_pp_z = BIC_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + EOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00000200u, + EORS_p_p_pp_z = EOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + ORR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800000u, + ORRS_p_p_pp_z = ORR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + ORN_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800010u, + ORNS_p_p_pp_z = ORN_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + NAND_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800210u, + NANDS_p_p_pp_z = NAND_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + NOR_p_p_pp_z = SVEPredicateLogicalFixed | 0x00800200u, + NORS_p_p_pp_z = NOR_p_p_pp_z | SVEPredicateLogicalSetFlagsBit, + SEL_p_p_pp = SVEPredicateLogicalFixed | 0x00000210u +}; + +enum SVEPredicateNextActiveOp : uint32_t { + SVEPredicateNextActiveFixed = 0x2519C400u, + SVEPredicateNextActiveFMask = 0xFF3FFE10u, + SVEPredicateNextActiveMask = 0xFF3FFE10u, + PNEXT_p_p_p = SVEPredicateNextActiveFixed +}; + +enum SVEPredicateReadFromFFR_PredicatedOp : uint32_t { + SVEPredicateReadFromFFR_PredicatedFixed = 0x2518F000u, + SVEPredicateReadFromFFR_PredicatedFMask = 0xFF3FFE10u, + SVEPredicateReadFromFFR_PredicatedMask = 0xFFFFFE10u, + RDFFR_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed, + RDFFRS_p_p_f = SVEPredicateReadFromFFR_PredicatedFixed | 0x00400000u +}; + +enum SVEPredicateReadFromFFR_UnpredicatedOp : uint32_t { + SVEPredicateReadFromFFR_UnpredicatedFixed = 0x2519F000u, + SVEPredicateReadFromFFR_UnpredicatedFMask = 0xFF3FFFF0u, + SVEPredicateReadFromFFR_UnpredicatedMask = 0xFFFFFFF0u, + RDFFR_p_f = SVEPredicateReadFromFFR_UnpredicatedFixed +}; + +enum SVEPredicateTestOp : uint32_t { + SVEPredicateTestFixed = 0x2510C000u, + SVEPredicateTestFMask = 0xFF3FC210u, + SVEPredicateTestMask = 0xFFFFC21Fu, + PTEST_p_p = SVEPredicateTestFixed | 0x00400000u +}; + +enum SVEPredicateZeroOp : uint32_t { + SVEPredicateZeroFixed = 0x2518E400u, + SVEPredicateZeroFMask = 0xFF3FFFF0u, + SVEPredicateZeroMask = 0xFFFFFFF0u, + PFALSE_p = SVEPredicateZeroFixed +}; + +enum SVEPropagateBreakOp : uint32_t { + SVEPropagateBreakFixed = 0x2500C000u, + SVEPropagateBreakFMask = 0xFF30C000u, + SVEPropagateBreakMask = 0xFFF0C210u, + BRKPA_p_p_pp = SVEPropagateBreakFixed, + BRKPB_p_p_pp = SVEPropagateBreakFixed | 0x00000010u, + BRKPAS_p_p_pp = SVEPropagateBreakFixed | 0x00400000u, + BRKPBS_p_p_pp = SVEPropagateBreakFixed | 0x00400010u +}; + +enum SVEPropagateBreakToNextPartitionOp : uint32_t { + SVEPropagateBreakToNextPartitionFixed = 0x25184000u, + SVEPropagateBreakToNextPartitionFMask = 0xFFBFC210u, + SVEPropagateBreakToNextPartitionMask = 0xFFFFC210u, + BRKN_p_p_pp = SVEPropagateBreakToNextPartitionFixed, + BRKNS_p_p_pp = SVEPropagateBreakToNextPartitionFixed | 0x00400000u +}; + +enum SVEReversePredicateElementsOp : uint32_t { + SVEReversePredicateElementsFixed = 0x05344000u, + SVEReversePredicateElementsFMask = 0xFF3FFE10u, + SVEReversePredicateElementsMask = 0xFF3FFE10u, + REV_p_p = SVEReversePredicateElementsFixed +}; + +enum SVEReverseVectorElementsOp : uint32_t { + SVEReverseVectorElementsFixed = 0x05383800u, + SVEReverseVectorElementsFMask = 0xFF3FFC00u, + SVEReverseVectorElementsMask = 0xFF3FFC00u, + REV_z_z = SVEReverseVectorElementsFixed +}; + +enum SVEReverseWithinElementsOp : uint32_t { + SVEReverseWithinElementsFixed = 0x05248000u, + SVEReverseWithinElementsFMask = 0xFF3CE000u, + SVEReverseWithinElementsMask = 0xFF3FE000u, + REVB_z_z = SVEReverseWithinElementsFixed, + REVH_z_z = SVEReverseWithinElementsFixed | 0x00010000u, + REVW_z_z = SVEReverseWithinElementsFixed | 0x00020000u, + RBIT_z_p_z = SVEReverseWithinElementsFixed | 0x00030000u +}; + +enum SVESaturatingIncDecRegisterByElementCountOp : uint32_t { + SVESaturatingIncDecRegisterByElementCountFixed = 0x0420F000u, + SVESaturatingIncDecRegisterByElementCountFMask = 0xFF20F000u, + SVESaturatingIncDecRegisterByElementCountMask = 0xFFF0FC00u, + SQINCB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed, + UQINCB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000400u, + SQDECB_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000800u, + UQDECB_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00000C00u, + SQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100000u, + UQINCB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100400u, + SQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100800u, + UQDECB_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00100C00u, + SQINCH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400000u, + UQINCH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400400u, + SQDECH_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400800u, + UQDECH_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00400C00u, + SQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500000u, + UQINCH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500400u, + SQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500800u, + UQDECH_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00500C00u, + SQINCW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800000u, + UQINCW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800400u, + SQDECW_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800800u, + UQDECW_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00800C00u, + SQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900000u, + UQINCW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900400u, + SQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900800u, + UQDECW_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00900C00u, + SQINCD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00000u, + UQINCD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00400u, + SQDECD_r_rs_sx = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00800u, + UQDECD_r_rs_uw = SVESaturatingIncDecRegisterByElementCountFixed | 0x00C00C00u, + SQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00000u, + UQINCD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00400u, + SQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00800u, + UQDECD_r_rs_x = SVESaturatingIncDecRegisterByElementCountFixed | 0x00D00C00u +}; + +enum SVESaturatingIncDecVectorByElementCountOp : uint32_t { + SVESaturatingIncDecVectorByElementCountFixed = 0x0420C000u, + SVESaturatingIncDecVectorByElementCountFMask = 0xFF30F000u, + SVESaturatingIncDecVectorByElementCountMask = 0xFFF0FC00u, + SQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400000u, + UQINCH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400400u, + SQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400800u, + UQDECH_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00400C00u, + SQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800000u, + UQINCW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800400u, + SQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800800u, + UQDECW_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00800C00u, + SQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00000u, + UQINCD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00400u, + SQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00800u, + UQDECD_z_zs = SVESaturatingIncDecVectorByElementCountFixed | 0x00C00C00u +}; + +enum SVEStackFrameAdjustmentOp { + SVEStackFrameAdjustmentFixed = 0x04205000u, + SVEStackFrameAdjustmentFMask = 0xFFA0F800u, + SVEStackFrameAdjustmentMask = 0xFFE0F800u, + ADDVL_r_ri = SVEStackFrameAdjustmentFixed, + ADDPL_r_ri = SVEStackFrameAdjustmentFixed | 0x00400000u +}; + +enum SVEStackFrameSizeOp : uint32_t { + SVEStackFrameSizeFixed = 0x04BF5000u, + SVEStackFrameSizeFMask = 0xFFFFF800u, + SVEStackFrameSizeMask = 0xFFFFF800u, + RDVL_r_i = SVEStackFrameSizeFixed +}; + +enum SVEStoreMultipleStructures_ScalarPlusImmOp : uint32_t { + SVEStoreMultipleStructures_ScalarPlusImmFixed = 0xE410E000u, + SVEStoreMultipleStructures_ScalarPlusImmFMask = 0xFE10E000u, + SVEStoreMultipleStructures_ScalarPlusImmMask = 0xFFF0E000u, + ST2B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00200000u, + ST3B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00400000u, + ST4B_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00600000u, + ST2H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00A00000u, + ST3H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00C00000u, + ST4H_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x00E00000u, + ST2W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01200000u, + ST3W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01400000u, + ST4W_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01600000u, + ST2D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01A00000u, + ST3D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01C00000u, + ST4D_z_p_bi_contiguous = SVEStoreMultipleStructures_ScalarPlusImmFixed | 0x01E00000u +}; + +enum SVEStoreMultipleStructures_ScalarPlusScalarOp : uint32_t { + SVEStoreMultipleStructures_ScalarPlusScalarFixed = 0xE4006000u, + SVEStoreMultipleStructures_ScalarPlusScalarFMask = 0xFE00E000u, + SVEStoreMultipleStructures_ScalarPlusScalarMask = 0xFFE0E000u, + ST2B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00200000u, + ST3B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00400000u, + ST4B_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00600000u, + ST2H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00A00000u, + ST3H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00C00000u, + ST4H_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x00E00000u, + ST2W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01200000u, + ST3W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01400000u, + ST4W_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01600000u, + ST2D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01A00000u, + ST3D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01C00000u, + ST4D_z_p_br_contiguous = SVEStoreMultipleStructures_ScalarPlusScalarFixed | 0x01E00000u +}; + +enum SVEStorePredicateRegisterOp : uint32_t { + SVEStorePredicateRegisterFixed = 0xE5800000u, + SVEStorePredicateRegisterFMask = 0xFFC0E010u, + SVEStorePredicateRegisterMask = 0xFFC0E010u, + STR_p_bi = SVEStorePredicateRegisterFixed +}; + +enum SVEStoreVectorRegisterOp : uint32_t { + SVEStoreVectorRegisterFixed = 0xE5804000u, + SVEStoreVectorRegisterFMask = 0xFFC0E000u, + SVEStoreVectorRegisterMask = 0xFFC0E000u, + STR_z_bi = SVEStoreVectorRegisterFixed +}; + +enum SVETableLookupOp : uint32_t { + SVETableLookupFixed = 0x05203000u, + SVETableLookupFMask = 0xFF20FC00u, + SVETableLookupMask = 0xFF20FC00u, + TBL_z_zz_1 = SVETableLookupFixed +}; + +enum SVEUnpackPredicateElementsOp : uint32_t { + SVEUnpackPredicateElementsFixed = 0x05304000u, + SVEUnpackPredicateElementsFMask = 0xFFFEFE10u, + SVEUnpackPredicateElementsMask = 0xFFFFFE10u, + PUNPKLO_p_p = SVEUnpackPredicateElementsFixed, + PUNPKHI_p_p = SVEUnpackPredicateElementsFixed | 0x00010000u +}; + +enum SVEUnpackVectorElementsOp : uint32_t { + SVEUnpackVectorElementsFixed = 0x05303800u, + SVEUnpackVectorElementsFMask = 0xFF3CFC00u, + SVEUnpackVectorElementsMask = 0xFF3FFC00u, + SUNPKLO_z_z = SVEUnpackVectorElementsFixed, + SUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00010000u, + UUNPKLO_z_z = SVEUnpackVectorElementsFixed | 0x00020000u, + UUNPKHI_z_z = SVEUnpackVectorElementsFixed | 0x00030000u +}; + +enum SVEVectorSelectOp : uint32_t { + SVEVectorSelectFixed = 0x0520C000u, + SVEVectorSelectFMask = 0xFF20C000u, + SVEVectorSelectMask = 0xFF20C000u, + SEL_z_p_zz = SVEVectorSelectFixed +}; + +enum SVEVectorSpliceOp : uint32_t { + SVEVectorSpliceFixed = 0x052C8000u, + SVEVectorSpliceFMask = 0xFF3FE000u, + SVEVectorSpliceMask = 0xFF3FE000u, + SPLICE_z_p_zz_des = SVEVectorSpliceFixed +}; + +enum ReservedOp : uint32_t { + ReservedFixed = 0x00000000u, + ReservedFMask = 0x1E000000u, + ReservedMask = 0xFFFF0000u, + UDF = ReservedFixed | 0x00000000u +}; + +// Unimplemented and unallocated instructions. These are defined to make fixed +// bit assertion easier. +enum UnimplementedOp : uint32_t { + UnimplementedFixed = 0x00000000u, + UnimplementedFMask = 0x00000000u +}; + +enum UnallocatedOp : uint32_t { + UnallocatedFixed = 0x00000000u, + UnallocatedFMask = 0x00000000u +}; + +// Re-enable `clang-format` after the `enum`s. +// clang-format on + +} // namespace aarch64 +} // namespace vixl + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#endif // VIXL_AARCH64_CONSTANTS_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/cpu-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/cpu-aarch64.h new file mode 100644 index 0000000000..d5a5f8c82e --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/cpu-aarch64.h @@ -0,0 +1,332 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_CPU_AARCH64_H +#define VIXL_CPU_AARCH64_H + +#include "../cpu-features.h" +#include "../globals-vixl.h" + +#include "instructions-aarch64.h" +#include "simulator-aarch64.h" + +#ifndef VIXL_INCLUDE_TARGET_AARCH64 +// The supporting .cc file is only compiled when the A64 target is selected. +// Throw an explicit error now to avoid a harder-to-debug linker error later. +// +// These helpers _could_ work on any AArch64 host, even when generating AArch32 +// code, but we don't support this because the available features may differ +// between AArch32 and AArch64 on the same platform, so basing AArch32 code +// generation on aarch64::CPU features is probably broken. +#error cpu-aarch64.h requires VIXL_INCLUDE_TARGET_AARCH64 (scons target=a64). +#endif + +namespace vixl { +namespace aarch64 { + +// A CPU ID register, for use with CPUFeatures::kIDRegisterEmulation. Fields +// specific to each register are described in relevant subclasses. +class IDRegister { + protected: + explicit IDRegister(uint64_t value = 0) : value_(value) {} + + class Field { + public: + enum Type { kUnsigned, kSigned }; + + static const int kMaxWidthInBits = 4; + + // This needs to be constexpr so that fields have "constant initialisation". + // This avoids initialisation order problems when these values are used to + // (dynamically) initialise static variables, etc. + explicit constexpr Field(int lsb, + int bitWidth = kMaxWidthInBits, + Type type = kUnsigned) + : lsb_(lsb), bitWidth_(bitWidth), type_(type) {} + + int GetWidthInBits() const { return bitWidth_; } + int GetLsb() const { return lsb_; } + int GetMsb() const { return lsb_ + GetWidthInBits() - 1; } + Type GetType() const { return type_; } + + private: + int lsb_; + int bitWidth_; + Type type_; + }; + + public: + // Extract the specified field, performing sign-extension for signed fields. + // This allows us to implement the 'value >= number' detection mechanism + // recommended by the Arm ARM, for both signed and unsigned fields. + int Get(Field field) const; + + private: + uint64_t value_; +}; + +class AA64PFR0 : public IDRegister { + public: + explicit AA64PFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kFP; + static const Field kAdvSIMD; + static const Field kRAS; + static const Field kSVE; + static const Field kDIT; + static const Field kCSV2; + static const Field kCSV3; +}; + +class AA64PFR1 : public IDRegister { + public: + explicit AA64PFR1(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kBT; + static const Field kSSBS; + static const Field kMTE; + static const Field kSME; +}; + +class AA64ISAR0 : public IDRegister { + public: + explicit AA64ISAR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kAES; + static const Field kSHA1; + static const Field kSHA2; + static const Field kCRC32; + static const Field kAtomic; + static const Field kRDM; + static const Field kSHA3; + static const Field kSM3; + static const Field kSM4; + static const Field kDP; + static const Field kFHM; + static const Field kTS; + static const Field kRNDR; +}; + +class AA64ISAR1 : public IDRegister { + public: + explicit AA64ISAR1(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kDPB; + static const Field kAPA; + static const Field kAPI; + static const Field kJSCVT; + static const Field kFCMA; + static const Field kLRCPC; + static const Field kGPA; + static const Field kGPI; + static const Field kFRINTTS; + static const Field kSB; + static const Field kSPECRES; + static const Field kBF16; + static const Field kDGH; + static const Field kI8MM; +}; + +class AA64ISAR2 : public IDRegister { + public: + explicit AA64ISAR2(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kWFXT; + static const Field kRPRES; + static const Field kMOPS; + static const Field kCSSC; +}; + +class AA64MMFR0 : public IDRegister { + public: + explicit AA64MMFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kECV; +}; + +class AA64MMFR1 : public IDRegister { + public: + explicit AA64MMFR1(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kLO; + static const Field kAFP; +}; + +class AA64MMFR2 : public IDRegister { + public: + explicit AA64MMFR2(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kAT; +}; + +class AA64ZFR0 : public IDRegister { + public: + explicit AA64ZFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kSVEver; + static const Field kAES; + static const Field kBitPerm; + static const Field kBF16; + static const Field kSHA3; + static const Field kSM4; + static const Field kI8MM; + static const Field kF32MM; + static const Field kF64MM; +}; + +class AA64SMFR0 : public IDRegister { + public: + explicit AA64SMFR0(uint64_t value) : IDRegister(value) {} + + CPUFeatures GetCPUFeatures() const; + + private: + static const Field kSMEf32f32; + static const Field kSMEb16f32; + static const Field kSMEf16f32; + static const Field kSMEi8i32; + static const Field kSMEf64f64; + static const Field kSMEi16i64; + static const Field kSMEfa64; +}; + +class CPU { + public: + // Initialise CPU support. + static void SetUp(); + + // Ensures the data at a given address and with a given size is the same for + // the I and D caches. I and D caches are not automatically coherent on ARM + // so this operation is required before any dynamically generated code can + // safely run. + static void EnsureIAndDCacheCoherency(void *address, size_t length); + + // Read and interpret the ID registers. This requires + // CPUFeatures::kIDRegisterEmulation, and therefore cannot be called on + // non-AArch64 platforms. + static CPUFeatures InferCPUFeaturesFromIDRegisters(); + + // Read and interpret CPUFeatures reported by the OS. Failed queries (or + // unsupported platforms) return an empty list. Note that this is + // indistinguishable from a successful query on a platform that advertises no + // features. + // + // Non-AArch64 hosts are considered to be unsupported platforms, and this + // function returns an empty list. + static CPUFeatures InferCPUFeaturesFromOS( + CPUFeatures::QueryIDRegistersOption option = + CPUFeatures::kQueryIDRegistersIfAvailable); + + // Query the SVE vector length. This requires CPUFeatures::kSVE. + static int ReadSVEVectorLengthInBits(); + + // Handle tagged pointers. + template + static T SetPointerTag(T pointer, uint64_t tag) { + VIXL_ASSERT(IsUintN(kAddressTagWidth, tag)); + + // Use C-style casts to get static_cast behaviour for integral types (T), + // and reinterpret_cast behaviour for other types. + + uint64_t raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw)); + + raw = (raw & ~kAddressTagMask) | (tag << kAddressTagOffset); + return (T)raw; + } + + template + static uint64_t GetPointerTag(T pointer) { + // Use C-style casts to get static_cast behaviour for integral types (T), + // and reinterpret_cast behaviour for other types. + + uint64_t raw = (uint64_t)pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw)); + + return (raw & kAddressTagMask) >> kAddressTagOffset; + } + + private: +#define VIXL_AARCH64_ID_REG_LIST(V) \ + V(AA64PFR0, "ID_AA64PFR0_EL1") \ + V(AA64PFR1, "ID_AA64PFR1_EL1") \ + V(AA64ISAR0, "ID_AA64ISAR0_EL1") \ + V(AA64ISAR1, "ID_AA64ISAR1_EL1") \ + V(AA64MMFR0, "ID_AA64MMFR0_EL1") \ + V(AA64MMFR1, "ID_AA64MMFR1_EL1") \ + /* These registers are RES0 in the baseline Arm8.0. We can always safely */ \ + /* read them, but some compilers don't accept the symbolic names. */ \ + V(AA64SMFR0, "S3_0_C0_C4_5") \ + V(AA64ISAR2, "S3_0_C0_C6_2") \ + V(AA64MMFR2, "S3_0_C0_C7_2") \ + V(AA64ZFR0, "S3_0_C0_C4_4") + +#define VIXL_READ_ID_REG(NAME, MRS_ARG) static NAME Read##NAME(); + // On native AArch64 platforms, read the named CPU ID registers. These require + // CPUFeatures::kIDRegisterEmulation, and should not be called on non-AArch64 + // platforms. + VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG) +#undef VIXL_READ_ID_REG + + // Return the content of the cache type register. + static uint32_t GetCacheType(); + + // I and D cache line size in bytes. + static unsigned icache_line_size_; + static unsigned dcache_line_size_; +}; + +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_CPU_AARCH64_H diff --git a/3rdparty/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h new file mode 100644 index 0000000000..0d87cf283c --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/cpu-features-auditor-aarch64.h @@ -0,0 +1,134 @@ +// Copyright 2018, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Arm Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_ +#define VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_ + +#include +#include +#include + +#include "../cpu-features.h" +#include "decoder-aarch64.h" +#include "decoder-visitor-map-aarch64.h" + +namespace vixl { +namespace aarch64 { + +// This visitor records the CPU features that each decoded instruction requires. +// It provides: +// - the set of CPU features required by the most recently decoded instruction, +// - a cumulative set of encountered CPU features, +// - an optional list of 'available' CPU features. +// +// Primarily, this allows the Disassembler and Simulator to share the same CPU +// features logic. However, it can be used standalone to scan code blocks for +// CPU features. +class CPUFeaturesAuditor : public DecoderVisitor { + public: + // Construction arguments: + // - If a decoder is specified, the CPUFeaturesAuditor automatically + // registers itself as a visitor. Otherwise, this can be done manually. + // + // - If an `available` features list is provided, it is used as a hint in + // cases where instructions may be provided by multiple separate features. + // An example of this is FP&SIMD loads and stores: some of these are used + // in both FP and integer SIMD code. If exactly one of those features is + // in `available` when one of these instructions is encountered, then the + // auditor will record that feature. Otherwise, it will record _both_ + // features. + explicit CPUFeaturesAuditor( + Decoder* decoder, const CPUFeatures& available = CPUFeatures::None()) + : available_(available), decoder_(decoder) { + if (decoder_ != NULL) decoder_->AppendVisitor(this); + } + + explicit CPUFeaturesAuditor( + const CPUFeatures& available = CPUFeatures::None()) + : available_(available), decoder_(NULL) {} + + virtual ~CPUFeaturesAuditor() { + if (decoder_ != NULL) decoder_->RemoveVisitor(this); + } + + void ResetSeenFeatures() { + seen_ = CPUFeatures::None(); + last_instruction_ = CPUFeatures::None(); + } + + // Query or set available CPUFeatures. + const CPUFeatures& GetAvailableFeatures() const { return available_; } + void SetAvailableFeatures(const CPUFeatures& available) { + available_ = available; + } + + // Query CPUFeatures seen since construction (or the last call to `Reset()`). + const CPUFeatures& GetSeenFeatures() const { return seen_; } + + // Query CPUFeatures from the last instruction visited by this auditor. + const CPUFeatures& GetInstructionFeatures() const { + return last_instruction_; + } + + bool InstructionIsAvailable() const { + return available_.Has(last_instruction_); + } + + // The common CPUFeatures interface operates on the available_ list. + CPUFeatures* GetCPUFeatures() { return &available_; } + void SetCPUFeatures(const CPUFeatures& available) { + SetAvailableFeatures(available); + } + + virtual void Visit(Metadata* metadata, + const Instruction* instr) VIXL_OVERRIDE; + + private: + class RecordInstructionFeaturesScope; + +#define DECLARE(A) virtual void Visit##A(const Instruction* instr); + VISITOR_LIST(DECLARE) +#undef DECLARE + + void LoadStoreHelper(const Instruction* instr); + void LoadStorePairHelper(const Instruction* instr); + + CPUFeatures seen_; + CPUFeatures last_instruction_; + CPUFeatures available_; + + Decoder* decoder_; + + using FormToVisitorFnMap = std::unordered_map< + uint32_t, + std::function>; + static const FormToVisitorFnMap* GetFormToVisitorFnMap(); +}; + +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_AARCH64_CPU_FEATURES_AUDITOR_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/decoder-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/decoder-aarch64.h new file mode 100644 index 0000000000..22c66e82a4 --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/decoder-aarch64.h @@ -0,0 +1,695 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_DECODER_AARCH64_H_ +#define VIXL_AARCH64_DECODER_AARCH64_H_ + +#include +#include +#include + +#include "../globals-vixl.h" + +#include "instructions-aarch64.h" + +// List macro containing all visitors needed by the decoder class. +#define VISITOR_LIST_THAT_RETURN(V) \ + V(AddSubExtended) \ + V(AddSubImmediate) \ + V(AddSubShifted) \ + V(AddSubWithCarry) \ + V(AtomicMemory) \ + V(Bitfield) \ + V(CompareBranch) \ + V(ConditionalBranch) \ + V(ConditionalCompareImmediate) \ + V(ConditionalCompareRegister) \ + V(ConditionalSelect) \ + V(Crypto2RegSHA) \ + V(Crypto3RegSHA) \ + V(CryptoAES) \ + V(DataProcessing1Source) \ + V(DataProcessing2Source) \ + V(DataProcessing3Source) \ + V(EvaluateIntoFlags) \ + V(Exception) \ + V(Extract) \ + V(FPCompare) \ + V(FPConditionalCompare) \ + V(FPConditionalSelect) \ + V(FPDataProcessing1Source) \ + V(FPDataProcessing2Source) \ + V(FPDataProcessing3Source) \ + V(FPFixedPointConvert) \ + V(FPImmediate) \ + V(FPIntegerConvert) \ + V(LoadLiteral) \ + V(LoadStoreExclusive) \ + V(LoadStorePAC) \ + V(LoadStorePairNonTemporal) \ + V(LoadStorePairOffset) \ + V(LoadStorePairPostIndex) \ + V(LoadStorePairPreIndex) \ + V(LoadStorePostIndex) \ + V(LoadStorePreIndex) \ + V(LoadStoreRCpcUnscaledOffset) \ + V(LoadStoreRegisterOffset) \ + V(LoadStoreUnscaledOffset) \ + V(LoadStoreUnsignedOffset) \ + V(LogicalImmediate) \ + V(LogicalShifted) \ + V(MoveWideImmediate) \ + V(NEON2RegMisc) \ + V(NEON2RegMiscFP16) \ + V(NEON3Different) \ + V(NEON3Same) \ + V(NEON3SameExtra) \ + V(NEON3SameFP16) \ + V(NEONAcrossLanes) \ + V(NEONByIndexedElement) \ + V(NEONCopy) \ + V(NEONExtract) \ + V(NEONLoadStoreMultiStruct) \ + V(NEONLoadStoreMultiStructPostIndex) \ + V(NEONLoadStoreSingleStruct) \ + V(NEONLoadStoreSingleStructPostIndex) \ + V(NEONModifiedImmediate) \ + V(NEONPerm) \ + V(NEONScalar2RegMisc) \ + V(NEONScalar2RegMiscFP16) \ + V(NEONScalar3Diff) \ + V(NEONScalar3Same) \ + V(NEONScalar3SameExtra) \ + V(NEONScalar3SameFP16) \ + V(NEONScalarByIndexedElement) \ + V(NEONScalarCopy) \ + V(NEONScalarPairwise) \ + V(NEONScalarShiftImmediate) \ + V(NEONShiftImmediate) \ + V(NEONTable) \ + V(PCRelAddressing) \ + V(RotateRightIntoFlags) \ + V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitGatherLoad_VectorPlusImm) \ + V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_VectorPlusImm) \ + V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitScatterStore_VectorPlusImm) \ + V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_VectorPlusImm) \ + V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_VectorPlusImm) \ + V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitScatterStore_VectorPlusImm) \ + V(SVEAddressGeneration) \ + V(SVEBitwiseLogicalUnpredicated) \ + V(SVEBitwiseShiftUnpredicated) \ + V(SVEFFRInitialise) \ + V(SVEFFRWriteFromPredicate) \ + V(SVEFPAccumulatingReduction) \ + V(SVEFPArithmeticUnpredicated) \ + V(SVEFPCompareVectors) \ + V(SVEFPCompareWithZero) \ + V(SVEFPComplexAddition) \ + V(SVEFPComplexMulAdd) \ + V(SVEFPComplexMulAddIndex) \ + V(SVEFPFastReduction) \ + V(SVEFPMulIndex) \ + V(SVEFPMulAdd) \ + V(SVEFPMulAddIndex) \ + V(SVEFPUnaryOpUnpredicated) \ + V(SVEIncDecByPredicateCount) \ + V(SVEIndexGeneration) \ + V(SVEIntArithmeticUnpredicated) \ + V(SVEIntCompareSignedImm) \ + V(SVEIntCompareUnsignedImm) \ + V(SVEIntCompareVectors) \ + V(SVEIntMulAddPredicated) \ + V(SVEIntMulAddUnpredicated) \ + V(SVEIntReduction) \ + V(SVEIntUnaryArithmeticPredicated) \ + V(SVEMovprfx) \ + V(SVEMulIndex) \ + V(SVEPermuteVectorExtract) \ + V(SVEPermuteVectorInterleaving) \ + V(SVEPredicateCount) \ + V(SVEPredicateLogical) \ + V(SVEPropagateBreak) \ + V(SVEStackFrameAdjustment) \ + V(SVEStackFrameSize) \ + V(SVEVectorSelect) \ + V(SVEBitwiseLogical_Predicated) \ + V(SVEBitwiseLogicalWithImm_Unpredicated) \ + V(SVEBitwiseShiftByImm_Predicated) \ + V(SVEBitwiseShiftByVector_Predicated) \ + V(SVEBitwiseShiftByWideElements_Predicated) \ + V(SVEBroadcastBitmaskImm) \ + V(SVEBroadcastFPImm_Unpredicated) \ + V(SVEBroadcastGeneralRegister) \ + V(SVEBroadcastIndexElement) \ + V(SVEBroadcastIntImm_Unpredicated) \ + V(SVECompressActiveElements) \ + V(SVEConditionallyBroadcastElementToVector) \ + V(SVEConditionallyExtractElementToSIMDFPScalar) \ + V(SVEConditionallyExtractElementToGeneralRegister) \ + V(SVEConditionallyTerminateScalars) \ + V(SVEConstructivePrefix_Unpredicated) \ + V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \ + V(SVEContiguousLoad_ScalarPlusImm) \ + V(SVEContiguousLoad_ScalarPlusScalar) \ + V(SVEContiguousNonFaultLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \ + V(SVEContiguousNonTemporalStore_ScalarPlusImm) \ + V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \ + V(SVEContiguousPrefetch_ScalarPlusImm) \ + V(SVEContiguousPrefetch_ScalarPlusScalar) \ + V(SVEContiguousStore_ScalarPlusImm) \ + V(SVEContiguousStore_ScalarPlusScalar) \ + V(SVECopySIMDFPScalarRegisterToVector_Predicated) \ + V(SVECopyFPImm_Predicated) \ + V(SVECopyGeneralRegisterToVector_Predicated) \ + V(SVECopyIntImm_Predicated) \ + V(SVEElementCount) \ + V(SVEExtractElementToSIMDFPScalarRegister) \ + V(SVEExtractElementToGeneralRegister) \ + V(SVEFPArithmetic_Predicated) \ + V(SVEFPArithmeticWithImm_Predicated) \ + V(SVEFPConvertPrecision) \ + V(SVEFPConvertToInt) \ + V(SVEFPExponentialAccelerator) \ + V(SVEFPRoundToIntegralValue) \ + V(SVEFPTrigMulAddCoefficient) \ + V(SVEFPTrigSelectCoefficient) \ + V(SVEFPUnaryOp) \ + V(SVEIncDecRegisterByElementCount) \ + V(SVEIncDecVectorByElementCount) \ + V(SVEInsertSIMDFPScalarRegister) \ + V(SVEInsertGeneralRegister) \ + V(SVEIntAddSubtractImm_Unpredicated) \ + V(SVEIntAddSubtractVectors_Predicated) \ + V(SVEIntCompareScalarCountAndLimit) \ + V(SVEIntConvertToFP) \ + V(SVEIntDivideVectors_Predicated) \ + V(SVEIntMinMaxImm_Unpredicated) \ + V(SVEIntMinMaxDifference_Predicated) \ + V(SVEIntMulImm_Unpredicated) \ + V(SVEIntMulVectors_Predicated) \ + V(SVELoadAndBroadcastElement) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \ + V(SVELoadMultipleStructures_ScalarPlusImm) \ + V(SVELoadMultipleStructures_ScalarPlusScalar) \ + V(SVELoadPredicateRegister) \ + V(SVELoadVectorRegister) \ + V(SVEPartitionBreakCondition) \ + V(SVEPermutePredicateElements) \ + V(SVEPredicateFirstActive) \ + V(SVEPredicateInitialize) \ + V(SVEPredicateNextActive) \ + V(SVEPredicateReadFromFFR_Predicated) \ + V(SVEPredicateReadFromFFR_Unpredicated) \ + V(SVEPredicateTest) \ + V(SVEPredicateZero) \ + V(SVEPropagateBreakToNextPartition) \ + V(SVEReversePredicateElements) \ + V(SVEReverseVectorElements) \ + V(SVEReverseWithinElements) \ + V(SVESaturatingIncDecRegisterByElementCount) \ + V(SVESaturatingIncDecVectorByElementCount) \ + V(SVEStoreMultipleStructures_ScalarPlusImm) \ + V(SVEStoreMultipleStructures_ScalarPlusScalar) \ + V(SVEStorePredicateRegister) \ + V(SVEStoreVectorRegister) \ + V(SVETableLookup) \ + V(SVEUnpackPredicateElements) \ + V(SVEUnpackVectorElements) \ + V(SVEVectorSplice) \ + V(System) \ + V(TestBranch) \ + V(Unallocated) \ + V(UnconditionalBranch) \ + V(UnconditionalBranchToRegister) \ + V(Unimplemented) + +#define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved) + +#define VISITOR_LIST(V) \ + VISITOR_LIST_THAT_RETURN(V) \ + VISITOR_LIST_THAT_DONT_RETURN(V) + +namespace vixl { +namespace aarch64 { + +using Metadata = std::map; + +// The Visitor interface consists only of the Visit() method. User classes +// that inherit from this one must provide an implementation of the method. +// Information about the instruction encountered by the Decoder is available +// via the metadata pointer. +class DecoderVisitor { + public: + enum VisitorConstness { kConstVisitor, kNonConstVisitor }; + explicit DecoderVisitor(VisitorConstness constness = kConstVisitor) + : constness_(constness) {} + + virtual ~DecoderVisitor() {} + + virtual void Visit(Metadata* metadata, const Instruction* instr) = 0; + + bool IsConstVisitor() const { return constness_ == kConstVisitor; } + Instruction* MutableInstruction(const Instruction* instr) { + VIXL_ASSERT(!IsConstVisitor()); + return const_cast(instr); + } + + private: + const VisitorConstness constness_; +}; + +class DecodeNode; +class CompiledDecodeNode; + +// The instruction decoder is constructed from a graph of decode nodes. At each +// node, a number of bits are sampled from the instruction being decoded. The +// resulting value is used to look up the next node in the graph, which then +// samples other bits, and moves to other decode nodes. Eventually, a visitor +// node is reached, and the corresponding visitor function is called, which +// handles the instruction. +class Decoder { + public: + Decoder() { ConstructDecodeGraph(); } + + // Top-level wrappers around the actual decoding function. + void Decode(const Instruction* instr); + void Decode(Instruction* instr); + + // Decode all instructions from start (inclusive) to end (exclusive). + template + void Decode(T start, T end) { + for (T instr = start; instr < end; instr = instr->GetNextInstruction()) { + Decode(instr); + } + } + + // Register a new visitor class with the decoder. + // Decode() will call the corresponding visitor method from all registered + // visitor classes when decoding reaches the leaf node of the instruction + // decode tree. + // Visitors are called in order. + // A visitor can be registered multiple times. + // + // d.AppendVisitor(V1); + // d.AppendVisitor(V2); + // d.PrependVisitor(V2); + // d.AppendVisitor(V3); + // + // d.Decode(i); + // + // will call in order visitor methods in V2, V1, V2, V3. + void AppendVisitor(DecoderVisitor* visitor); + void PrependVisitor(DecoderVisitor* visitor); + // These helpers register `new_visitor` before or after the first instance of + // `registered_visiter` in the list. + // So if + // V1, V2, V1, V2 + // are registered in this order in the decoder, calls to + // d.InsertVisitorAfter(V3, V1); + // d.InsertVisitorBefore(V4, V2); + // will yield the order + // V1, V3, V4, V2, V1, V2 + // + // For more complex modifications of the order of registered visitors, one can + // directly access and modify the list of visitors via the `visitors()' + // accessor. + void InsertVisitorBefore(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor); + void InsertVisitorAfter(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor); + + // Remove all instances of a previously registered visitor class from the list + // of visitors stored by the decoder. + void RemoveVisitor(DecoderVisitor* visitor); + + void VisitNamedInstruction(const Instruction* instr, const std::string& name); + + std::list* visitors() { return &visitors_; } + + // Get a DecodeNode by name from the Decoder's map. + DecodeNode* GetDecodeNode(std::string name); + + private: + // Decodes an instruction and calls the visitor functions registered with the + // Decoder class. + void DecodeInstruction(const Instruction* instr); + + // Add an initialised DecodeNode to the decode_node_ map. + void AddDecodeNode(const DecodeNode& node); + + // Visitors are registered in a list. + std::list visitors_; + + // Compile the dynamically generated decode graph based on the static + // information in kDecodeMapping and kVisitorNodes. + void ConstructDecodeGraph(); + + // Root node for the compiled decoder graph, stored here to avoid a map lookup + // for every instruction decoded. + CompiledDecodeNode* compiled_decoder_root_; + + // Map of node names to DecodeNodes. + std::map decode_nodes_; +}; + +typedef void (Decoder::*DecodeFnPtr)(const Instruction*); +typedef uint32_t (Instruction::*BitExtractFn)(void) const; + +// A Visitor node maps the name of a visitor to the function that handles it. +struct VisitorNode { + const char* name; + const DecodeFnPtr visitor_fn; +}; + +// DecodePattern and DecodeMapping represent the input data to the decoder +// compilation stage. After compilation, the decoder is embodied in the graph +// of CompiledDecodeNodes pointer to by compiled_decoder_root_. + +// A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded +// as uint32_t to its handler. +// The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10. +// 0b11 marks the edge of the most-significant bits of the pattern, which is +// required to determine the length. For example, the pattern "1x01"_b is +// encoded in a uint32_t as 0b11_01_10_00_01. +struct DecodePattern { + uint32_t pattern; + const char* handler; +}; + +// A DecodeMapping consists of the name of a handler, the bits sampled in the +// instruction by that handler, and a mapping from the pattern that those +// sampled bits match to the corresponding name of a node. +struct DecodeMapping { + const char* name; + const std::vector sampled_bits; + const std::vector mapping; +}; + +// For speed, before nodes can be used for decoding instructions, they must +// be compiled. This converts the mapping "bit pattern strings to decoder name +// string" stored in DecodeNodes to an array look up for the pointer to the next +// node, stored in CompiledDecodeNodes. Compilation may also apply other +// optimisations for simple decode patterns. +class CompiledDecodeNode { + public: + // Constructor for decode node, containing a decode table and pointer to a + // function that extracts the bits to be sampled. + CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size) + : bit_extract_fn_(bit_extract_fn), + instruction_name_("node"), + decode_table_size_(decode_table_size), + decoder_(NULL) { + decode_table_ = new CompiledDecodeNode*[decode_table_size_]; + memset(decode_table_, 0, decode_table_size_ * sizeof(decode_table_[0])); + } + + // Constructor for wrappers around visitor functions. These require no + // decoding, so no bit extraction function or decode table is assigned. + explicit CompiledDecodeNode(std::string iname, Decoder* decoder) + : bit_extract_fn_(NULL), + instruction_name_(iname), + decode_table_(NULL), + decode_table_size_(0), + decoder_(decoder) {} + + ~CompiledDecodeNode() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { + // Free the decode table, if this is a compiled, non-leaf node. + if (decode_table_ != NULL) { + VIXL_ASSERT(!IsLeafNode()); + delete[] decode_table_; + } + } + + // Decode the instruction by either sampling the bits using the bit extract + // function to find the next node, or, if we're at a leaf, calling the visitor + // function. + void Decode(const Instruction* instr) const; + + // A leaf node is a wrapper for a visitor function. + bool IsLeafNode() const { + VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) || + ((instruction_name_ != "node") && (bit_extract_fn_ == NULL))); + return instruction_name_ != "node"; + } + + // Get a pointer to the next node required in the decode process, based on the + // bits sampled by the current node. + CompiledDecodeNode* GetNodeForBits(uint32_t bits) const { + VIXL_ASSERT(bits < decode_table_size_); + return decode_table_[bits]; + } + + // Set the next node in the decode process for the pattern of sampled bits in + // the current node. + void SetNodeForBits(uint32_t bits, CompiledDecodeNode* n) { + VIXL_ASSERT(bits < decode_table_size_); + VIXL_ASSERT(n != NULL); + decode_table_[bits] = n; + } + + private: + // Pointer to an instantiated template function for extracting the bits + // sampled by this node. Set to NULL for leaf nodes. + const BitExtractFn bit_extract_fn_; + + // Visitor function that handles the instruction identified. Set only for + // leaf nodes, where no extra decoding is required, otherwise NULL. + std::string instruction_name_; + + // Mapping table from instruction bits to next decode stage. + CompiledDecodeNode** decode_table_; + const size_t decode_table_size_; + + // Pointer to the decoder containing this node, used to call its visitor + // function for leaf nodes. Set to NULL for non-leaf nodes. + Decoder* decoder_; +}; + +class DecodeNode { + public: + // Default constructor needed for map initialisation. + DecodeNode() + : sampled_bits_(DecodeNode::kEmptySampledBits), + pattern_table_(DecodeNode::kEmptyPatternTable), + compiled_node_(NULL) {} + + // Constructor for DecodeNode wrappers around visitor functions. These are + // marked as "compiled", as there is no decoding left to do. + explicit DecodeNode(const std::string& iname, Decoder* decoder) + : name_(iname), + sampled_bits_(DecodeNode::kEmptySampledBits), + instruction_name_(iname), + pattern_table_(DecodeNode::kEmptyPatternTable), + decoder_(decoder), + compiled_node_(NULL) {} + + // Constructor for DecodeNodes that map bit patterns to other DecodeNodes. + explicit DecodeNode(const DecodeMapping& map, Decoder* decoder = NULL) + : name_(map.name), + sampled_bits_(map.sampled_bits), + instruction_name_("node"), + pattern_table_(map.mapping), + decoder_(decoder), + compiled_node_(NULL) { + // With the current two bits per symbol encoding scheme, the maximum pattern + // length is (32 - 2) / 2 = 15 bits. + VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15); + for (const DecodePattern& p : map.mapping) { + VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size()); + } + } + + ~DecodeNode() { + // Delete the compiled version of this node, if one was created. + if (compiled_node_ != NULL) { + delete compiled_node_; + } + } + + // Get the bits sampled from the instruction by this node. + const std::vector& GetSampledBits() const { return sampled_bits_; } + + // Get the number of bits sampled from the instruction by this node. + size_t GetSampledBitsCount() const { return sampled_bits_.size(); } + + // A leaf node is a DecodeNode that wraps the visitor function for the + // identified instruction class. + bool IsLeafNode() const { return instruction_name_ != "node"; } + + std::string GetName() const { return name_; } + + // Create a CompiledDecodeNode of specified table size that uses + // bit_extract_fn to sample bits from the instruction. + void CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size) { + VIXL_ASSERT(bit_extract_fn != NULL); + VIXL_ASSERT(table_size > 0); + compiled_node_ = new CompiledDecodeNode(bit_extract_fn, table_size); + } + + // Create a CompiledDecodeNode wrapping a visitor function. No decoding is + // required for this node; the visitor function is called instead. + void CreateVisitorNode() { + compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_); + } + + // Find and compile the DecodeNode named "name", and set it as the node for + // the pattern "bits". + void CompileNodeForBits(Decoder* decoder, std::string name, uint32_t bits); + + // Get a pointer to an instruction method that extracts the instruction bits + // specified by the mask argument, and returns those sampled bits as a + // contiguous sequence, suitable for indexing an array. + // For example, a mask of 0b1010 returns a function that, given an instruction + // 0bXYZW, will return 0bXZ. + BitExtractFn GetBitExtractFunction(uint32_t mask) { + return GetBitExtractFunctionHelper(mask, 0); + } + + // Get a pointer to an Instruction method that applies a mask to the + // instruction bits, and tests if the result is equal to value. The returned + // function gives a 1 result if (inst & mask == value), 0 otherwise. + BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) { + return GetBitExtractFunctionHelper(value, mask); + } + + // Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer + // to it. This pointer is also stored inside the DecodeNode itself. Destroying + // a DecodeNode frees its associated CompiledDecodeNode. + CompiledDecodeNode* Compile(Decoder* decoder); + + // Get a pointer to the CompiledDecodeNode associated with this DecodeNode. + // Returns NULL if the node has not been compiled yet. + CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; } + bool IsCompiled() const { return GetCompiledNode() != NULL; } + + enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 }; + static const uint32_t kEndOfPattern = 3; + static const uint32_t kPatternSymbolMask = 3; + + size_t GetPatternLength(uint32_t pattern) const { + uint32_t hsb = HighestSetBitPosition(pattern); + // The pattern length is signified by two set bits in a two bit-aligned + // position. Ensure that the pattern has a highest set bit, it's at an odd + // bit position, and that the bit to the right of the hsb is also set. + VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern); + return hsb / 2; + } + + bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const { + while ((pattern & kPatternSymbolMask) != kEndOfPattern) { + if (static_cast(pattern & kPatternSymbolMask) == symbol) + return true; + pattern >>= 2; + } + return false; + } + + PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const { + size_t len = GetPatternLength(pattern); + VIXL_ASSERT((pos < 15) && (pos < len)); + uint32_t shift = static_cast(2 * (len - pos - 1)); + uint32_t sym = (pattern >> shift) & kPatternSymbolMask; + return static_cast(sym); + } + + private: + // Generate a mask and value pair from a pattern constructed from 0, 1 and x + // (don't care) 2-bit symbols. + // For example "10x1"_b should return mask = 0b1101, value = 0b1001. + typedef std::pair MaskValuePair; + MaskValuePair GenerateMaskValuePair(uint32_t pattern) const; + + // Generate a pattern ordered by the bit positions sampled by this node. + // The symbol corresponding to the lowest sample position is placed in the + // least-significant bits of the result pattern. + // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30 + // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31 + // '1'. + // This output makes comparisons easier between the pattern and bits sampled + // from an instruction using the fast "compress" algorithm. See + // Instruction::Compress(). + uint32_t GenerateOrderedPattern(uint32_t pattern) const; + + // Generate a mask with a bit set at each sample position. + uint32_t GenerateSampledBitsMask() const; + + // Try to compile a more optimised decode operation for this node, returning + // true if successful. + bool TryCompileOptimisedDecodeTable(Decoder* decoder); + + // Helper function that returns a bit extracting function. If y is zero, + // x is a bit extraction mask. Otherwise, y is the mask, and x is the value + // to match after masking. + BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y); + + // Name of this decoder node, used to construct edges in the decode graph. + std::string name_; + + // Vector of bits sampled from an instruction to determine which node to look + // up next in the decode process. + const std::vector& sampled_bits_; + static const std::vector kEmptySampledBits; + + // For leaf nodes, this is the name of the instruction form that the node + // represents. For other nodes, this is always set to "node". + std::string instruction_name_; + + // Source mapping from bit pattern to name of next decode stage. + const std::vector& pattern_table_; + static const std::vector kEmptyPatternTable; + + // Pointer to the decoder containing this node, used to call its visitor + // function for leaf nodes. + Decoder* decoder_; + + // Pointer to the compiled version of this node. Is this node hasn't been + // compiled yet, this pointer is NULL. + CompiledDecodeNode* compiled_node_; +}; + +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_AARCH64_DECODER_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/decoder-constants-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/decoder-constants-aarch64.h new file mode 100644 index 0000000000..70e01a103f --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/decoder-constants-aarch64.h @@ -0,0 +1,9963 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +namespace vixl { +namespace aarch64 { + +// Recursively construct a uint32_t encoded bit pattern from a string literal. +// The string characters are mapped as two-bit symbols '0'=>0, '1'=>1, 'x'=>2. +// The remaining symbol, 3, is used to mark the end of the pattern, allowing +// its length to be found. For example, the pattern "1x01"_b is encoded in a +// uint32_t as 0b11_01_00_01. The maximum pattern string length is 15 +// characters, encoded as 3 in the most significant bits, followed by 15 2-bit +// symbols. +constexpr uint32_t str_to_two_bit_pattern(const char* x, size_t s, uint32_t a) { + if (s == 0) return a; + uint32_t r = (x[0] == 'x') ? 2 : (x[0] - '0'); + return str_to_two_bit_pattern(x + 1, s - 1, (a << 2) | r); +} + +constexpr uint32_t operator"" _b(const char* x, size_t s) { + return str_to_two_bit_pattern(x, s, DecodeNode::kEndOfPattern); +} + +// This decode table is derived from the AArch64 ISA XML specification, +// available from https://developer.arm.com/products/architecture/a-profile/ + +// clang-format off +static const DecodeMapping kDecodeMapping[] = { + { "_ggvlym", + {13, 12}, + { {"00"_b, "adc_32_addsub_carry"}, + }, + }, + + { "_ghmtnl", + {18, 17}, + { {"0x"_b, "ld1_asisdlsep_r3_r3"}, + {"10"_b, "ld1_asisdlsep_r3_r3"}, + {"11"_b, "ld1_asisdlsep_i3_i3"}, + }, + }, + + { "_ghpxms", + {23, 22}, + { {"01"_b, "fmla_z_p_zzz"}, + {"1x"_b, "fmla_z_p_zzz"}, + }, + }, + + { "_ghqqzy", + {11}, + { {"0"_b, "_qrsxzp"}, + }, + }, + + { "_ghrnmz", + {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5}, + { {"000010011111"_b, "xpacd_64z_dp_1src"}, + }, + }, + + { "_gjprgr", + {22, 13, 12}, + { {"000"_b, "ldsmax_64_memop"}, + {"001"_b, "ldsmin_64_memop"}, + {"010"_b, "ldumax_64_memop"}, + {"011"_b, "ldumin_64_memop"}, + {"100"_b, "ldsmaxl_64_memop"}, + {"101"_b, "ldsminl_64_memop"}, + {"110"_b, "ldumaxl_64_memop"}, + {"111"_b, "lduminl_64_memop"}, + }, + }, + + { "_gjprmg", + {11}, + { {"0"_b, "_llpsqq"}, + }, + }, + + { "_gjtmjg", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "bic_asimdimm_l_hl"}, + {"00x100"_b, "uqshrn_asimdshf_n"}, + {"00x101"_b, "uqrshrn_asimdshf_n"}, + {"010x00"_b, "uqshrn_asimdshf_n"}, + {"010x01"_b, "uqrshrn_asimdshf_n"}, + {"011100"_b, "uqshrn_asimdshf_n"}, + {"011101"_b, "uqrshrn_asimdshf_n"}, + {"0x1000"_b, "uqshrn_asimdshf_n"}, + {"0x1001"_b, "uqrshrn_asimdshf_n"}, + }, + }, + + { "_gjxsrn", + {2, 1, 0}, + { {"000"_b, "_sqttsv"}, + }, + }, + + { "_gknljg", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "wfet_only_systeminstrswithreg"}, + }, + }, + + { "_gkqhyz", + {23, 22}, + { {"00"_b, "fmsub_s_floatdp3"}, + {"01"_b, "fmsub_d_floatdp3"}, + {"11"_b, "fmsub_h_floatdp3"}, + }, + }, + + { "_glgznt", + {20, 19, 18, 17, 16, 4, 3}, + { {"0000001"_b, "fcmp_dz_floatcmp"}, + {"0000011"_b, "fcmpe_dz_floatcmp"}, + {"xxxxx00"_b, "fcmp_d_floatcmp"}, + {"xxxxx10"_b, "fcmpe_d_floatcmp"}, + }, + }, + + { "_gljqng", + {22, 13, 12}, + { {"000"_b, "ldsmaxa_32_memop"}, + {"001"_b, "ldsmina_32_memop"}, + {"010"_b, "ldumaxa_32_memop"}, + {"011"_b, "ldumina_32_memop"}, + {"100"_b, "ldsmaxal_32_memop"}, + {"101"_b, "ldsminal_32_memop"}, + {"110"_b, "ldumaxal_32_memop"}, + {"111"_b, "lduminal_32_memop"}, + }, + }, + + { "_glkvkr", + {30}, + { {"0"_b, "adds_32_addsub_shift"}, + {"1"_b, "subs_32_addsub_shift"}, + }, + }, + + { "_glpxty", + {20, 19, 18, 17, 16}, + { {"00010"_b, "scvtf_s32_float2fix"}, + {"00011"_b, "ucvtf_s32_float2fix"}, + {"11000"_b, "fcvtzs_32s_float2fix"}, + {"11001"_b, "fcvtzu_32s_float2fix"}, + }, + }, + + { "_gmqyjv", + {30, 20, 19, 18, 17, 16, 13}, + { {"1111110"_b, "_nvkxzs"}, + }, + }, + + { "_gmsmls", + {13}, + { {"0"_b, "mls_asimdelem_r"}, + {"1"_b, "umlsl_asimdelem_l"}, + }, + }, + + { "_gmsqqz", + {23}, + { {"0"_b, "facge_asimdsame_only"}, + {"1"_b, "facgt_asimdsame_only"}, + }, + }, + + { "_gmtjvr", + {16, 13, 12}, + { {"000"_b, "rev_64_dp_1src"}, + {"001"_b, "cnt_64_dp_1src"}, + {"100"_b, "pacdb_64p_dp_1src"}, + {"101"_b, "autdb_64p_dp_1src"}, + {"110"_b, "_rlxhxz"}, + {"111"_b, "_phjkhr"}, + }, + }, + + { "_gngjxr", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cadd_z_zz"}, + {"00001"_b, "sqcadd_z_zz"}, + }, + }, + + { "_gnhjkl", + {16, 13, 12}, + { {"000"_b, "rbit_64_dp_1src"}, + {"001"_b, "clz_64_dp_1src"}, + {"010"_b, "abs_64_dp_1src"}, + {"100"_b, "pacia_64p_dp_1src"}, + {"101"_b, "autia_64p_dp_1src"}, + {"110"_b, "_yzxjnk"}, + {"111"_b, "_prxyhr"}, + }, + }, + + { "_gnpgsg", + {22}, + { {"0"_b, "str_64_ldst_regoff"}, + {"1"_b, "ldr_64_ldst_regoff"}, + }, + }, + + { "_gnqhsl", + {23, 22, 20, 19, 18, 17, 16}, + { {"0010000"_b, "punpklo_p_p"}, + {"0010001"_b, "punpkhi_p_p"}, + {"xx0xxxx"_b, "zip1_p_pp"}, + {"xx10100"_b, "rev_p_p"}, + }, + }, + + { "_gnxrlr", + {23, 22, 13, 12, 11, 10}, + { {"0011x0"_b, "sudot_asimdelem_d"}, + {"0111x0"_b, "bfdot_asimdelem_e"}, + {"0x1001"_b, "scvtf_asimdshf_c"}, + {"0x1111"_b, "fcvtzs_asimdshf_c"}, + {"1011x0"_b, "usdot_asimdelem_d"}, + {"1111x0"_b, "bfmlal_asimdelem_f"}, + {"xx00x0"_b, "sqdmulh_asimdelem_r"}, + {"xx01x0"_b, "sqrdmulh_asimdelem_r"}, + {"xx10x0"_b, "sdot_asimdelem_d"}, + }, + }, + + { "_gplkxy", + {20, 19, 18, 17, 16}, + { {"00000"_b, "sqneg_asimdmisc_r"}, + }, + }, + + { "_gpxltv", + {23, 18, 17, 16}, + { {"0000"_b, "uqxtnt_z_zz"}, + }, + }, + + { "_gqmjys", + {18, 17}, + { {"0x"_b, "st1_asisdlsop_sx1_r1s"}, + {"10"_b, "st1_asisdlsop_sx1_r1s"}, + {"11"_b, "st1_asisdlsop_s1_i1s"}, + }, + }, + + { "_grgrpt", + {18}, + { {"1"_b, "fmaxv_v_p_z"}, + }, + }, + + { "_grjzyl", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_hjtsgj"}, + }, + }, + + { "_grktgm", + {30, 23, 22, 19}, + { {"1001"_b, "aesd_b_cryptoaes"}, + {"xxx0"_b, "cnt_asimdmisc_r"}, + }, + }, + + { "_grmpht", + {20, 18, 17}, + { {"000"_b, "_mjjhqj"}, + }, + }, + + { "_grprpj", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldarb_lr32_ldstexcl"}, + }, + }, + + { "_grqsgp", + {23, 22, 4, 3, 2, 1, 0}, + { {"0000001"_b, "svc_ex_exception"}, + {"0000010"_b, "hvc_ex_exception"}, + {"0000011"_b, "smc_ex_exception"}, + {"0100000"_b, "hlt_ex_exception"}, + }, + }, + + { "_grsnms", + {20, 19, 18, 17, 16}, + { {"00010"_b, "scvtf_h32_float2fix"}, + {"00011"_b, "ucvtf_h32_float2fix"}, + {"11000"_b, "fcvtzs_32h_float2fix"}, + {"11001"_b, "fcvtzu_32h_float2fix"}, + }, + }, + + { "_grsslr", + {30, 23, 22, 11, 10, 4}, + { {"001000"_b, "ccmn_32_condcmp_reg"}, + {"001100"_b, "ccmn_32_condcmp_imm"}, + {"101000"_b, "ccmp_32_condcmp_reg"}, + {"101100"_b, "ccmp_32_condcmp_imm"}, + }, + }, + + { "_grvxrm", + {12}, + { {"0"_b, "st4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_gshlgj", + {30, 23, 22, 13, 12, 11, 10}, + { {"100xxx1"_b, "ins_asimdins_iv_v"}, + {"x00xxx0"_b, "ext_asimdext_only"}, + {"x010001"_b, "fmaxnmp_asimdsamefp16_only"}, + {"x010101"_b, "faddp_asimdsamefp16_only"}, + {"x010111"_b, "fmul_asimdsamefp16_only"}, + {"x011001"_b, "fcmge_asimdsamefp16_only"}, + {"x011011"_b, "facge_asimdsamefp16_only"}, + {"x011101"_b, "fmaxp_asimdsamefp16_only"}, + {"x011111"_b, "fdiv_asimdsamefp16_only"}, + {"x110001"_b, "fminnmp_asimdsamefp16_only"}, + {"x110101"_b, "fabd_asimdsamefp16_only"}, + {"x111001"_b, "fcmgt_asimdsamefp16_only"}, + {"x111011"_b, "facgt_asimdsamefp16_only"}, + {"x111101"_b, "fminp_asimdsamefp16_only"}, + }, + }, + + { "_gshrzq", + {22, 20, 11}, + { {"010"_b, "decb_r_rs"}, + {"110"_b, "dech_r_rs"}, + }, + }, + + { "_gsjvmx", + {12}, + { {"0"_b, "st3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_gslmjl", + {23, 22}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, + }, + }, + + { "_gsnnnt", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"}, + {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"}, + {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"}, + {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"}, + {"100xx00"_b, "stlurh_32_ldapstl_unscaled"}, + {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"}, + {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"}, + {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"}, + {"x000001"_b, "cpyfprn_cpy_memcms"}, + {"x000101"_b, "cpyfpwtrn_cpy_memcms"}, + {"x001001"_b, "cpyfprtrn_cpy_memcms"}, + {"x001101"_b, "cpyfptrn_cpy_memcms"}, + {"x010001"_b, "cpyfmrn_cpy_memcms"}, + {"x010101"_b, "cpyfmwtrn_cpy_memcms"}, + {"x011001"_b, "cpyfmrtrn_cpy_memcms"}, + {"x011101"_b, "cpyfmtrn_cpy_memcms"}, + {"x100001"_b, "cpyfern_cpy_memcms"}, + {"x100101"_b, "cpyfewtrn_cpy_memcms"}, + {"x101001"_b, "cpyfertrn_cpy_memcms"}, + {"x101101"_b, "cpyfetrn_cpy_memcms"}, + {"x110001"_b, "sete_set_memcms"}, + {"x110101"_b, "setet_set_memcms"}, + {"x111001"_b, "seten_set_memcms"}, + {"x111101"_b, "setetn_set_memcms"}, + }, + }, + + { "_gsvlph", + {22, 4, 3}, + { {"00x"_b, "prfm_p_ldst_regoff"}, + {"010"_b, "prfm_p_ldst_regoff"}, + {"011"_b, "rprfm_r_ldst_regoff"}, + }, + }, + + { "_gtqnvr", + {30, 23, 22}, + { {"000"_b, "msub_32a_dp_3src"}, + }, + }, + + { "_gtsglj", + {11, 10, 9, 8, 7, 6}, + { {"000001"_b, "tcommit_only_barriers"}, + {"000011"_b, "sb_only_barriers"}, + {"xx1000"_b, "dsb_bon_barriers"}, + {"xxxx10"_b, "dmb_bo_barriers"}, + }, + }, + + { "_gtxpgx", + {30, 23, 13, 4}, + { {"0000"_b, "prfw_i_p_bz_s_x32_scaled"}, + {"0010"_b, "prfd_i_p_bz_s_x32_scaled"}, + {"010x"_b, "ld1h_z_p_bz_s_x32_scaled"}, + {"011x"_b, "ldff1h_z_p_bz_s_x32_scaled"}, + {"1000"_b, "prfw_i_p_bz_d_x32_scaled"}, + {"1010"_b, "prfd_i_p_bz_d_x32_scaled"}, + {"110x"_b, "ld1h_z_p_bz_d_x32_scaled"}, + {"111x"_b, "ldff1h_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_gvpvjn", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "rev_32_dp_1src"}, + {"0000001"_b, "ctz_32_dp_1src"}, + }, + }, + + { "_gvxjvz", + {23, 22, 12}, + { {"000"_b, "_tgvkhm"}, + {"001"_b, "_ktyrgy"}, + {"010"_b, "_gxzgtk"}, + {"011"_b, "_vlxrps"}, + {"110"_b, "_jqrmyp"}, + {"111"_b, "_ssypmm"}, + }, + }, + + { "_gxmnkl", + {23, 22}, + { {"10"_b, "cdot_z_zzzi_s"}, + {"11"_b, "cdot_z_zzzi_d"}, + }, + }, + + { "_gxqnph", + {23, 22, 13, 12, 11, 10}, + { {"0x1001"_b, "ucvtf_asimdshf_c"}, + {"0x1111"_b, "fcvtzu_asimdshf_c"}, + {"1000x0"_b, "fmlsl2_asimdelem_lh"}, + {"xx01x0"_b, "sqrdmlah_asimdelem_r"}, + {"xx10x0"_b, "udot_asimdelem_d"}, + {"xx11x0"_b, "sqrdmlsh_asimdelem_r"}, + }, + }, + + { "_gxzgtk", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fabs_d_floatdp1"}, + {"000010"_b, "fsqrt_d_floatdp1"}, + {"000110"_b, "fcvt_hd_floatdp1"}, + {"001000"_b, "frintp_d_floatdp1"}, + {"001010"_b, "frintz_d_floatdp1"}, + {"001110"_b, "frinti_d_floatdp1"}, + {"010000"_b, "frint32x_d_floatdp1"}, + {"010010"_b, "frint64x_d_floatdp1"}, + }, + }, + + { "_gyjphh", + {30, 23, 22, 11, 10}, + { {"00000"_b, "_plgrmv"}, + {"00001"_b, "_xmxhhg"}, + {"00100"_b, "_lmmjvx"}, + {"00110"_b, "_tmtgqm"}, + {"01100"_b, "_hvmyjz"}, + {"10000"_b, "_mgtxyt"}, + {"10100"_b, "_rkzlpp"}, + {"10110"_b, "_xqrgjj"}, + }, + }, + + { "_gyllxt", + {23}, + { {"0"_b, "_hzkxht"}, + }, + }, + + { "_gylmmr", + {30, 23, 22, 11, 10}, + { {"00010"_b, "str_b_ldst_regoff"}, + {"00110"_b, "ldr_b_ldst_regoff"}, + {"01010"_b, "str_q_ldst_regoff"}, + {"01110"_b, "ldr_q_ldst_regoff"}, + {"10010"_b, "str_h_ldst_regoff"}, + {"10110"_b, "ldr_h_ldst_regoff"}, + }, + }, + + { "_gyrjrm", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cpy_z_p_v"}, + {"00001"_b, "compact_z_p_z"}, + {"00010"_b, "lasta_v_p_z"}, + {"00011"_b, "lastb_v_p_z"}, + {"00100"_b, "revb_z_z"}, + {"00101"_b, "revh_z_z"}, + {"00110"_b, "revw_z_z"}, + {"00111"_b, "rbit_z_p_z"}, + {"01000"_b, "clasta_z_p_zz"}, + {"01001"_b, "clastb_z_p_zz"}, + {"01010"_b, "clasta_v_p_z"}, + {"01011"_b, "clastb_v_p_z"}, + {"01100"_b, "splice_z_p_zz_des"}, + {"01101"_b, "splice_z_p_zz_con"}, + }, + }, + + { "_gyrkkz", + {30, 22, 11}, + { {"000"_b, "_nqjvmr"}, + {"001"_b, "_jjnvrv"}, + {"010"_b, "_yptgjg"}, + {"011"_b, "_vsyjql"}, + {"100"_b, "_lzqxgt"}, + {"110"_b, "_xvrvhv"}, + {"111"_b, "_ptstkz"}, + }, + }, + + { "_gyymmx", + {30, 13, 12}, + { {"000"_b, "stilp_32se_ldiappstilp"}, + {"001"_b, "stilp_32s_ldiappstilp"}, + {"100"_b, "stilp_64ss_ldiappstilp"}, + {"101"_b, "stilp_64s_ldiappstilp"}, + }, + }, + + { "_gzgpjp", + {23}, + { {"0"_b, "fmaxp_asimdsame_only"}, + {"1"_b, "fminp_asimdsame_only"}, + }, + }, + + { "_gznrjv", + {30, 23, 22, 19, 16}, + { {"10010"_b, "aese_b_cryptoaes"}, + {"xxx00"_b, "cls_asimdmisc_r"}, + {"xxx01"_b, "sqxtn_asimdmisc_n"}, + }, + }, + + { "_gzpkvm", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "swpb_32_memop"}, + {"0000100"_b, "rcwclr_64_memop"}, + {"0001000"_b, "rcwswp_64_memop"}, + {"0001100"_b, "rcwset_64_memop"}, + {"000xx10"_b, "strb_32b_ldst_regoff"}, + {"0010000"_b, "swplb_32_memop"}, + {"0010100"_b, "rcwclrl_64_memop"}, + {"0011000"_b, "rcwswpl_64_memop"}, + {"0011100"_b, "rcwsetl_64_memop"}, + {"001xx10"_b, "ldrb_32b_ldst_regoff"}, + {"0100000"_b, "swpab_32_memop"}, + {"0100100"_b, "rcwclra_64_memop"}, + {"0101000"_b, "rcwswpa_64_memop"}, + {"0101100"_b, "rcwseta_64_memop"}, + {"010xx10"_b, "ldrsb_64b_ldst_regoff"}, + {"0110000"_b, "swpalb_32_memop"}, + {"0110100"_b, "rcwclral_64_memop"}, + {"0111000"_b, "rcwswpal_64_memop"}, + {"0111100"_b, "rcwsetal_64_memop"}, + {"011xx10"_b, "ldrsb_32b_ldst_regoff"}, + {"1000000"_b, "swph_32_memop"}, + {"1000100"_b, "rcwsclr_64_memop"}, + {"1001000"_b, "rcwsswp_64_memop"}, + {"1001100"_b, "rcwsset_64_memop"}, + {"100xx10"_b, "strh_32_ldst_regoff"}, + {"1010000"_b, "swplh_32_memop"}, + {"1010100"_b, "rcwsclrl_64_memop"}, + {"1011000"_b, "rcwsswpl_64_memop"}, + {"1011100"_b, "rcwssetl_64_memop"}, + {"101xx10"_b, "ldrh_32_ldst_regoff"}, + {"1100000"_b, "swpah_32_memop"}, + {"1100100"_b, "rcwsclra_64_memop"}, + {"1101000"_b, "rcwsswpa_64_memop"}, + {"1101100"_b, "rcwsseta_64_memop"}, + {"110xx10"_b, "ldrsh_64_ldst_regoff"}, + {"1110000"_b, "swpalh_32_memop"}, + {"1110100"_b, "rcwsclral_64_memop"}, + {"1111000"_b, "rcwsswpal_64_memop"}, + {"1111100"_b, "rcwssetal_64_memop"}, + {"111xx10"_b, "ldrsh_32_ldst_regoff"}, + }, + }, + + { "_gzqvnk", + {23, 12, 4, 3, 2, 1, 0}, + { {"1000000"_b, "ctermeq_rr"}, + {"1010000"_b, "ctermne_rr"}, + {"x10xxxx"_b, "whilewr_p_rr"}, + {"x11xxxx"_b, "whilerw_p_rr"}, + }, + }, + + { "_gzrtkk", + {18, 17}, + { {"0x"_b, "ld1_asisdlsep_r1_r1"}, + {"10"_b, "ld1_asisdlsep_r1_r1"}, + {"11"_b, "ld1_asisdlsep_i1_i1"}, + }, + }, + + { "_gzvylr", + {30, 13}, + { {"00"_b, "_rjyrnt"}, + {"01"_b, "_mzhsrq"}, + {"10"_b, "_prtvjm"}, + {"11"_b, "_zspprz"}, + }, + }, + + { "_gzzsgh", + {18}, + { {"0"_b, "ld3_asisdlso_b3_3b"}, + }, + }, + + { "_hgjgpm", + {30}, + { {"0"_b, "bic_64_log_shift"}, + {"1"_b, "eon_64_log_shift"}, + }, + }, + + { "_hgxtqy", + {30, 23, 22, 13}, + { {"0001"_b, "ldnt1w_z_p_ar_s_x32_unscaled"}, + {"0010"_b, "ld1rsh_z_p_bi_s64"}, + {"0011"_b, "ld1rsh_z_p_bi_s32"}, + {"0110"_b, "ld1rsb_z_p_bi_s64"}, + {"0111"_b, "ld1rsb_z_p_bi_s32"}, + {"1000"_b, "ldnt1sw_z_p_ar_d_64_unscaled"}, + {"1010"_b, "ld1sw_z_p_bz_d_64_unscaled"}, + {"1011"_b, "ldff1sw_z_p_bz_d_64_unscaled"}, + }, + }, + + { "_hhkqtn", + {20, 19, 18, 17, 16}, + { {"00000"_b, "lasta_r_p_z"}, + {"00001"_b, "lastb_r_p_z"}, + {"01000"_b, "cpy_z_p_r"}, + {"10000"_b, "clasta_r_p_z"}, + {"10001"_b, "clastb_r_p_z"}, + }, + }, + + { "_hhlmrg", + {23, 20, 19, 18, 17, 16, 13}, + { {"0000000"_b, "ld2r_asisdlso_r2"}, + {"0000001"_b, "ld4r_asisdlso_r4"}, + {"10xxxx0"_b, "ld2r_asisdlsop_rx2_r"}, + {"10xxxx1"_b, "ld4r_asisdlsop_rx4_r"}, + {"110xxx0"_b, "ld2r_asisdlsop_rx2_r"}, + {"110xxx1"_b, "ld4r_asisdlsop_rx4_r"}, + {"1110xx0"_b, "ld2r_asisdlsop_rx2_r"}, + {"1110xx1"_b, "ld4r_asisdlsop_rx4_r"}, + {"11110x0"_b, "ld2r_asisdlsop_rx2_r"}, + {"11110x1"_b, "ld4r_asisdlsop_rx4_r"}, + {"1111100"_b, "ld2r_asisdlsop_rx2_r"}, + {"1111101"_b, "ld4r_asisdlsop_rx4_r"}, + {"1111110"_b, "ld2r_asisdlsop_r2_i"}, + {"1111111"_b, "ld4r_asisdlsop_r4_i"}, + }, + }, + + { "_hhxpjz", + {18}, + { {"0"_b, "ld2_asisdlso_b2_2b"}, + }, + }, + + { "_hhxpyt", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_b_ldapstl_simd"}, + {"001xx10"_b, "ldapur_b_ldapstl_simd"}, + {"010xx10"_b, "stlur_q_ldapstl_simd"}, + {"011xx10"_b, "ldapur_q_ldapstl_simd"}, + {"100xx10"_b, "stlur_h_ldapstl_simd"}, + {"101xx10"_b, "ldapur_h_ldapstl_simd"}, + {"x000001"_b, "cpyp_cpy_memcms"}, + {"x000101"_b, "cpypwt_cpy_memcms"}, + {"x001001"_b, "cpyprt_cpy_memcms"}, + {"x001101"_b, "cpypt_cpy_memcms"}, + {"x010001"_b, "cpym_cpy_memcms"}, + {"x010101"_b, "cpymwt_cpy_memcms"}, + {"x011001"_b, "cpymrt_cpy_memcms"}, + {"x011101"_b, "cpymt_cpy_memcms"}, + {"x100001"_b, "cpye_cpy_memcms"}, + {"x100101"_b, "cpyewt_cpy_memcms"}, + {"x101001"_b, "cpyert_cpy_memcms"}, + {"x101101"_b, "cpyet_cpy_memcms"}, + {"x110001"_b, "setgp_set_memcms"}, + {"x110101"_b, "setgpt_set_memcms"}, + {"x111001"_b, "setgpn_set_memcms"}, + {"x111101"_b, "setgptn_set_memcms"}, + }, + }, + + { "_hjplhs", + {20, 19, 18, 17, 16, 13, 12}, + { {"1111100"_b, "ldaprb_32l_memop"}, + }, + }, + + { "_hjqryy", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "wfit_only_systeminstrswithreg"}, + }, + }, + + { "_hjtsgj", + {23}, + { {"0"_b, "_pnkxsr"}, + }, + }, + + { "_hjvkkq", + {18}, + { {"0"_b, "ld4_asisdlsep_r4_r"}, + {"1"_b, "ld4_asisdlsep_i4_i"}, + }, + }, + + { "_hkgzsh", + {13, 12, 11, 10}, + { {"1111"_b, "_qvzvmq"}, + }, + }, + + { "_hkjjsr", + {12}, + { {"0"_b, "ld1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_hkpjqm", + {30}, + { {"1"_b, "_qgyppr"}, + }, + }, + + { "_hkxlsm", + {18}, + { {"0"_b, "st4_asisdlsop_hx4_r4h"}, + {"1"_b, "st4_asisdlsop_h4_i4h"}, + }, + }, + + { "_hkxzqg", + {2, 1}, + { {"00"_b, "br_64_branch_reg"}, + }, + }, + + { "_hljttg", + {12}, + { {"0"_b, "ld2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_hlljqz", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stur_s_ldst_unscaled"}, + {"00001"_b, "str_s_ldst_immpost"}, + {"00011"_b, "str_s_ldst_immpre"}, + {"00100"_b, "ldur_s_ldst_unscaled"}, + {"00101"_b, "ldr_s_ldst_immpost"}, + {"00111"_b, "ldr_s_ldst_immpre"}, + {"10000"_b, "stur_d_ldst_unscaled"}, + {"10001"_b, "str_d_ldst_immpost"}, + {"10011"_b, "str_d_ldst_immpre"}, + {"10100"_b, "ldur_d_ldst_unscaled"}, + {"10101"_b, "ldr_d_ldst_immpost"}, + {"10111"_b, "ldr_d_ldst_immpre"}, + }, + }, + + { "_hlqvmm", + {20, 19, 18, 17, 16, 13, 12, 9, 8, 7, 6, 5}, + { {"000010011111"_b, "xpaci_64z_dp_1src"}, + }, + }, + + { "_hlxmpy", + {13, 12, 11, 10}, + { {"0000"_b, "umlal_asimddiff_l"}, + {"0001"_b, "sub_asimdsame_only"}, + {"0010"_b, "_hytrnv"}, + {"0011"_b, "cmeq_asimdsame_only"}, + {"0101"_b, "mls_asimdsame_only"}, + {"0110"_b, "_vjhrzl"}, + {"0111"_b, "pmul_asimdsame_only"}, + {"1000"_b, "umlsl_asimddiff_l"}, + {"1001"_b, "umaxp_asimdsame_only"}, + {"1010"_b, "_zpjzst"}, + {"1011"_b, "uminp_asimdsame_only"}, + {"1101"_b, "sqrdmulh_asimdsame_only"}, + {"1110"_b, "_jztlrz"}, + }, + }, + + { "_hlypvy", + {30, 23, 22}, + { {"000"_b, "smaddl_64wa_dp_3src"}, + {"010"_b, "umaddl_64wa_dp_3src"}, + }, + }, + + { "_hmgzjl", + {18}, + { {"0"_b, "st3_asisdlso_h3_3h"}, + }, + }, + + { "_hmjrmm", + {30, 23, 22, 20, 19, 18}, + { {"00xxxx"_b, "add_32_addsub_imm"}, + {"011000"_b, "smax_32_minmax_imm"}, + {"011001"_b, "umax_32u_minmax_imm"}, + {"011010"_b, "smin_32_minmax_imm"}, + {"011011"_b, "umin_32u_minmax_imm"}, + {"10xxxx"_b, "sub_32_addsub_imm"}, + }, + }, + + { "_hmpzzg", + {22, 20, 19, 18, 17, 16}, + { {"111000"_b, "fcmle_asisdmiscfp16_fz"}, + {"111001"_b, "frsqrte_asisdmiscfp16_r"}, + {"x00000"_b, "fcmle_asisdmisc_fz"}, + {"x00001"_b, "frsqrte_asisdmisc_r"}, + }, + }, + + { "_hmsgpj", + {13, 12, 10}, + { {"000"_b, "_hthxvr"}, + {"100"_b, "ptrue_p_s"}, + {"101"_b, "_kkvrzq"}, + {"110"_b, "_xxjrsy"}, + }, + }, + + { "_hmtmlq", + {4}, + { {"0"_b, "nor_p_p_pp_z"}, + {"1"_b, "nand_p_p_pp_z"}, + }, + }, + + { "_hnjrmp", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, + }, + }, + + { "_hnkyxy", + {18, 17, 16}, + { {"011"_b, "_ykpgyh"}, + }, + }, + + { "_hnsvjh", + {19}, + { {"0"_b, "_ntjrlg"}, + {"1"_b, "sysl_rc_systeminstrs"}, + }, + }, + + { "_hpmvzr", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "yield_hi_hints"}, + {"000001"_b, "wfi_hi_hints"}, + {"000010"_b, "sevl_hi_hints"}, + {"000011"_b, "xpaclri_hi_hints"}, + {"001000"_b, "psb_hc_hints"}, + {"001001"_b, "gcsb_hd_hints"}, + {"001100"_b, "paciasp_hi_hints"}, + {"001101"_b, "pacibsp_hi_hints"}, + {"001110"_b, "autiasp_hi_hints"}, + {"001111"_b, "autibsp_hi_hints"}, + {"0x01xx"_b, "hint_hm_hints"}, + {"0x101x"_b, "hint_hm_hints"}, + {"10x0xx"_b, "hint_hm_hints"}, + {"10x1xx"_b, "hint_hm_hints"}, + {"1101xx"_b, "hint_hm_hints"}, + {"11101x"_b, "hint_hm_hints"}, + {"x100xx"_b, "hint_hm_hints"}, + {"x1100x"_b, "hint_hm_hints"}, + {"x111xx"_b, "hint_hm_hints"}, + }, + }, + + { "_hpqkhv", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldaxp_lp64_ldstexcl"}, + }, + }, + + { "_hptkrj", + {30, 22, 13, 12, 11, 10}, + { {"000001"_b, "rmif_only_rmif"}, + {"01xx00"_b, "ccmn_64_condcmp_reg"}, + {"01xx10"_b, "ccmn_64_condcmp_imm"}, + {"11xx00"_b, "ccmp_64_condcmp_reg"}, + {"11xx10"_b, "ccmp_64_condcmp_imm"}, + }, + }, + + { "_hqkhsy", + {12}, + { {"0"_b, "st3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_hqkljv", + {30, 23, 22}, + { {"000"_b, "and_32_log_imm"}, + {"010"_b, "movn_32_movewide"}, + {"100"_b, "eor_32_log_imm"}, + {"110"_b, "movz_32_movewide"}, + }, + }, + + { "_hqnsvg", + {30}, + { {"0"_b, "add_64_addsub_shift"}, + {"1"_b, "sub_64_addsub_shift"}, + }, + }, + + { "_hqvhjp", + {22}, + { {"0"_b, "str_32_ldst_regoff"}, + {"1"_b, "ldr_32_ldst_regoff"}, + }, + }, + + { "_hrmsnk", + {9, 8, 7, 6, 5, 2, 1}, + { {"1111111"_b, "eretaa_64e_branch_reg"}, + }, + }, + + { "_hrpkqg", + {18, 17, 12}, + { {"000"_b, "st4_asisdlso_d4_4d"}, + }, + }, + + { "_hrxtnj", + {30, 23, 22, 13, 12, 11, 10}, + { {"1010000"_b, "sm3partw1_vvv4_cryptosha512_3"}, + {"1010001"_b, "sm3partw2_vvv4_cryptosha512_3"}, + {"1010010"_b, "sm4ekey_vvv4_cryptosha512_3"}, + }, + }, + + { "_hrxyts", + {23, 22, 20, 19, 18, 13}, + { {"00000x"_b, "orr_z_zi"}, + {"01000x"_b, "eor_z_zi"}, + {"10000x"_b, "and_z_zi"}, + {"11000x"_b, "dupm_z_i"}, + {"xx1xx0"_b, "fcpy_z_p_i"}, + }, + }, + + { "_hrymnk", + {18}, + { {"0"_b, "st1_asisdlso_h1_1h"}, + }, + }, + + { "_hspyhv", + {13, 12}, + { {"10"_b, "umax_64_dp_2src"}, + }, + }, + + { "_hsrkqt", + {13, 12, 11, 10}, + { {"0000"_b, "addhn_asimddiff_n"}, + {"0001"_b, "sshl_asimdsame_only"}, + {"0010"_b, "_qtgrzv"}, + {"0011"_b, "sqshl_asimdsame_only"}, + {"0100"_b, "sabal_asimddiff_l"}, + {"0101"_b, "srshl_asimdsame_only"}, + {"0110"_b, "_vhkpvn"}, + {"0111"_b, "sqrshl_asimdsame_only"}, + {"1000"_b, "subhn_asimddiff_n"}, + {"1001"_b, "smax_asimdsame_only"}, + {"1010"_b, "_rgztgm"}, + {"1011"_b, "smin_asimdsame_only"}, + {"1100"_b, "sabdl_asimddiff_l"}, + {"1101"_b, "sabd_asimdsame_only"}, + {"1110"_b, "_grmpht"}, + {"1111"_b, "saba_asimdsame_only"}, + }, + }, + + { "_hthxvr", + {23, 22, 9}, + { {"010"_b, "pfirst_p_p_p"}, + }, + }, + + { "_htjmmx", + {30}, + { {"0"_b, "tbnz_only_testbranch"}, + }, + }, + + { "_htkpks", + {30, 23, 22}, + { {"000"_b, "add_32_addsub_ext"}, + {"100"_b, "sub_32_addsub_ext"}, + }, + }, + + { "_htplsj", + {4}, + { {"0"_b, "cmpeq_p_p_zz"}, + {"1"_b, "cmpne_p_p_zz"}, + }, + }, + + { "_htqpks", + {30, 20, 19, 18, 17, 16, 13}, + { {"000000x"_b, "add_z_zi"}, + {"000001x"_b, "sub_z_zi"}, + {"000011x"_b, "subr_z_zi"}, + {"000100x"_b, "sqadd_z_zi"}, + {"000101x"_b, "uqadd_z_zi"}, + {"000110x"_b, "sqsub_z_zi"}, + {"000111x"_b, "uqsub_z_zi"}, + {"0010000"_b, "smax_z_zi"}, + {"0010010"_b, "umax_z_zi"}, + {"0010100"_b, "smin_z_zi"}, + {"0010110"_b, "umin_z_zi"}, + {"0100000"_b, "mul_z_zi"}, + {"011000x"_b, "dup_z_i"}, + {"0110010"_b, "fdup_z_i"}, + {"1xxxxx0"_b, "fnmad_z_p_zzz"}, + {"1xxxxx1"_b, "fnmsb_z_p_zzz"}, + }, + }, + + { "_htrtzz", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_b_ldapstl_simd"}, + {"001xx10"_b, "ldapur_b_ldapstl_simd"}, + {"010xx10"_b, "stlur_q_ldapstl_simd"}, + {"011xx10"_b, "ldapur_q_ldapstl_simd"}, + {"100xx10"_b, "stlur_h_ldapstl_simd"}, + {"101xx10"_b, "ldapur_h_ldapstl_simd"}, + {"x000001"_b, "cpypwn_cpy_memcms"}, + {"x000101"_b, "cpypwtwn_cpy_memcms"}, + {"x001001"_b, "cpyprtwn_cpy_memcms"}, + {"x001101"_b, "cpyptwn_cpy_memcms"}, + {"x010001"_b, "cpymwn_cpy_memcms"}, + {"x010101"_b, "cpymwtwn_cpy_memcms"}, + {"x011001"_b, "cpymrtwn_cpy_memcms"}, + {"x011101"_b, "cpymtwn_cpy_memcms"}, + {"x100001"_b, "cpyewn_cpy_memcms"}, + {"x100101"_b, "cpyewtwn_cpy_memcms"}, + {"x101001"_b, "cpyertwn_cpy_memcms"}, + {"x101101"_b, "cpyetwn_cpy_memcms"}, + {"x110001"_b, "setgm_set_memcms"}, + {"x110101"_b, "setgmt_set_memcms"}, + {"x111001"_b, "setgmn_set_memcms"}, + {"x111101"_b, "setgmtn_set_memcms"}, + }, + }, + + { "_htsjxj", + {23, 22, 13, 12, 11, 10}, + { {"001010"_b, "pmullb_z_zz_q"}, + {"001011"_b, "pmullt_z_zz_q"}, + {"101010"_b, "pmullb_z_zz"}, + {"101011"_b, "pmullt_z_zz"}, + {"x11010"_b, "pmullb_z_zz"}, + {"x11011"_b, "pmullt_z_zz"}, + {"xx0000"_b, "saddwb_z_zz"}, + {"xx0001"_b, "saddwt_z_zz"}, + {"xx0010"_b, "uaddwb_z_zz"}, + {"xx0011"_b, "uaddwt_z_zz"}, + {"xx0100"_b, "ssubwb_z_zz"}, + {"xx0101"_b, "ssubwt_z_zz"}, + {"xx0110"_b, "usubwb_z_zz"}, + {"xx0111"_b, "usubwt_z_zz"}, + {"xx1000"_b, "sqdmullb_z_zz"}, + {"xx1001"_b, "sqdmullt_z_zz"}, + {"xx1100"_b, "smullb_z_zz"}, + {"xx1101"_b, "smullt_z_zz"}, + {"xx1110"_b, "umullb_z_zz"}, + {"xx1111"_b, "umullt_z_zz"}, + }, + }, + + { "_hvhrsq", + {30, 23, 22}, + { {"000"_b, "str_32_ldst_pos"}, + {"001"_b, "ldr_32_ldst_pos"}, + {"010"_b, "ldrsw_64_ldst_pos"}, + {"100"_b, "str_64_ldst_pos"}, + {"101"_b, "ldr_64_ldst_pos"}, + {"110"_b, "prfm_p_ldst_pos"}, + }, + }, + + { "_hvmyjz", + {13, 12}, + { {"00"_b, "subps_64s_dp_2src"}, + }, + }, + + { "_hvnhmh", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"}, + {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"}, + {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"}, + {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"}, + {"100xx00"_b, "stlurh_32_ldapstl_unscaled"}, + {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"}, + {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"}, + {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"}, + {"x000001"_b, "cpyfpwn_cpy_memcms"}, + {"x000101"_b, "cpyfpwtwn_cpy_memcms"}, + {"x001001"_b, "cpyfprtwn_cpy_memcms"}, + {"x001101"_b, "cpyfptwn_cpy_memcms"}, + {"x010001"_b, "cpyfmwn_cpy_memcms"}, + {"x010101"_b, "cpyfmwtwn_cpy_memcms"}, + {"x011001"_b, "cpyfmrtwn_cpy_memcms"}, + {"x011101"_b, "cpyfmtwn_cpy_memcms"}, + {"x100001"_b, "cpyfewn_cpy_memcms"}, + {"x100101"_b, "cpyfewtwn_cpy_memcms"}, + {"x101001"_b, "cpyfertwn_cpy_memcms"}, + {"x101101"_b, "cpyfetwn_cpy_memcms"}, + {"x110001"_b, "setm_set_memcms"}, + {"x110101"_b, "setmt_set_memcms"}, + {"x111001"_b, "setmn_set_memcms"}, + {"x111101"_b, "setmtn_set_memcms"}, + }, + }, + + { "_hvrjyt", + {30, 23, 22}, + { {"000"_b, "sbfm_32m_bitfield"}, + {"010"_b, "extr_32_extract"}, + {"100"_b, "ubfm_32m_bitfield"}, + }, + }, + + { "_hvyjnk", + {11}, + { {"0"_b, "sqrdmulh_z_zzi_h"}, + }, + }, + + { "_hxgngr", + {23, 22, 13}, + { {"100"_b, "fmlsl_asimdelem_lh"}, + {"xx1"_b, "smlsl_asimdelem_l"}, + }, + }, + + { "_hxlznn", + {30, 23, 22, 13}, + { {"0000"_b, "ld1sh_z_p_br_s32"}, + {"0001"_b, "ldff1sh_z_p_br_s32"}, + {"0010"_b, "ld1w_z_p_br_u64"}, + {"0011"_b, "ldff1w_z_p_br_u64"}, + {"0100"_b, "ld1sb_z_p_br_s32"}, + {"0101"_b, "ldff1sb_z_p_br_s32"}, + {"0110"_b, "ld1d_z_p_br_u64"}, + {"0111"_b, "ldff1d_z_p_br_u64"}, + {"1001"_b, "st2w_z_p_br_contiguous"}, + {"1010"_b, "st1w_z_p_br"}, + {"1011"_b, "st4w_z_p_br_contiguous"}, + {"1100"_b, "str_z_bi"}, + {"1101"_b, "st2d_z_p_br_contiguous"}, + {"1110"_b, "st1d_z_p_br"}, + {"1111"_b, "st4d_z_p_br_contiguous"}, + }, + }, + + { "_hxrnns", + {23, 22, 13, 12}, + { {"0000"_b, "fmul_s_floatdp2"}, + {"0001"_b, "fdiv_s_floatdp2"}, + {"0010"_b, "fadd_s_floatdp2"}, + {"0011"_b, "fsub_s_floatdp2"}, + {"0100"_b, "fmul_d_floatdp2"}, + {"0101"_b, "fdiv_d_floatdp2"}, + {"0110"_b, "fadd_d_floatdp2"}, + {"0111"_b, "fsub_d_floatdp2"}, + {"1100"_b, "fmul_h_floatdp2"}, + {"1101"_b, "fdiv_h_floatdp2"}, + {"1110"_b, "fadd_h_floatdp2"}, + {"1111"_b, "fsub_h_floatdp2"}, + }, + }, + + { "_hxxqks", + {23}, + { {"0"_b, "fmla_asimdsame_only"}, + {"1"_b, "fmls_asimdsame_only"}, + }, + }, + + { "_hxxxyy", + {13, 12}, + { {"00"_b, "cpyfm_cpy_memcms"}, + {"01"_b, "cpyfmwt_cpy_memcms"}, + {"10"_b, "cpyfmrt_cpy_memcms"}, + {"11"_b, "cpyfmt_cpy_memcms"}, + }, + }, + + { "_hykhmt", + {20, 19, 18, 17, 16}, + { {"00000"_b, "saddv_r_p_z"}, + {"00001"_b, "uaddv_r_p_z"}, + {"01000"_b, "smaxv_r_p_z"}, + {"01001"_b, "umaxv_r_p_z"}, + {"01010"_b, "sminv_r_p_z"}, + {"01011"_b, "uminv_r_p_z"}, + {"1000x"_b, "movprfx_z_p_z"}, + {"11000"_b, "orv_r_p_z"}, + {"11001"_b, "eorv_r_p_z"}, + {"11010"_b, "andv_r_p_z"}, + }, + }, + + { "_hynprk", + {18}, + { {"0"_b, "st2_asisdlso_h2_2h"}, + }, + }, + + { "_hyskth", + {22}, + { {"0"_b, "str_64_ldst_regoff"}, + {"1"_b, "ldr_64_ldst_regoff"}, + }, + }, + + { "_hytrnv", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frinta_asimdmiscfp16_r"}, + {"0x00001"_b, "frinta_asimdmisc_r"}, + {"xx00000"_b, "cmge_asimdmisc_z"}, + }, + }, + + { "_hzkglv", + {30, 23, 22, 13}, + { {"0000"_b, "ld1b_z_p_br_u8"}, + {"0001"_b, "ldff1b_z_p_br_u8"}, + {"0010"_b, "ld1b_z_p_br_u32"}, + {"0011"_b, "ldff1b_z_p_br_u32"}, + {"0100"_b, "ld1sw_z_p_br_s64"}, + {"0101"_b, "ldff1sw_z_p_br_s64"}, + {"0110"_b, "ld1h_z_p_br_u32"}, + {"0111"_b, "ldff1h_z_p_br_u32"}, + {"1001"_b, "stnt1b_z_p_br_contiguous"}, + {"1011"_b, "st3b_z_p_br_contiguous"}, + {"10x0"_b, "st1b_z_p_br"}, + {"1101"_b, "stnt1h_z_p_br_contiguous"}, + {"1111"_b, "st3h_z_p_br_contiguous"}, + {"11x0"_b, "st1h_z_p_br"}, + }, + }, + + { "_hzkxht", + {22, 20}, + { {"00"_b, "_zrxhzq"}, + {"01"_b, "msr_sr_systemmove"}, + {"10"_b, "_krllsy"}, + {"11"_b, "msrr_sr_systemmovepr"}, + }, + }, + + { "_hzsxkp", + {30, 13}, + { {"00"_b, "_jlrrlt"}, + {"01"_b, "_jrlynj"}, + {"10"_b, "_ghpxms"}, + {"11"_b, "_nyjtng"}, + }, + }, + + { "_jggxjz", + {13, 12}, + { {"00"_b, "cmtst_asisdsame_only"}, + }, + }, + + { "_jgklkt", + {30}, + { {"0"_b, "ldrsw_64_loadlit"}, + {"1"_b, "prfm_p_loadlit"}, + }, + }, + + { "_jgmlpk", + {4}, + { {"0"_b, "match_p_p_zz"}, + {"1"_b, "nmatch_p_p_zz"}, + }, + }, + + { "_jgsryt", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldaxrh_lr32_ldstexcl"}, + }, + }, + + { "_jgxqzr", + {13, 12, 11, 10}, + { {"0000"_b, "_xzjvkv"}, + {"0001"_b, "_nqjtqn"}, + {"0011"_b, "_qzmrnj"}, + {"0100"_b, "_xptsns"}, + {"0101"_b, "_qpgxxr"}, + {"0110"_b, "uzp1_asimdperm_only"}, + {"0111"_b, "_rsnvnr"}, + {"1000"_b, "_yszlqj"}, + {"1001"_b, "_lzvxxj"}, + {"1010"_b, "trn1_asimdperm_only"}, + {"1011"_b, "_zmrhxx"}, + {"1100"_b, "_skytvx"}, + {"1101"_b, "_smptxh"}, + {"1110"_b, "zip1_asimdperm_only"}, + {"1111"_b, "_rjvgkl"}, + }, + }, + + { "_jgyhrh", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, + }, + }, + + { "_jhkkgv", + {10}, + { {"0"_b, "_qvgtlh"}, + }, + }, + + { "_jhllmn", + {4}, + { {"0"_b, "cmpge_p_p_zz"}, + {"1"_b, "cmpgt_p_p_zz"}, + }, + }, + + { "_jhltlz", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldxr_lr64_ldstexcl"}, + }, + }, + + { "_jjgpxz", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_h_floatimm"}, + }, + }, + + { "_jjnvrv", + {20, 19, 18, 17, 16, 13, 12, 4, 3, 2, 1, 0}, + { {"000000001101"_b, "setf8_only_setf"}, + }, + }, + + { "_jkkqvy", + {22, 20, 11}, + { {"100"_b, "uqinch_z_zs"}, + {"101"_b, "uqdech_z_zs"}, + {"110"_b, "dech_z_zs"}, + }, + }, + + { "_jkvsxy", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000010"_b, "rcwcas_c64_rcwcomswap"}, + {"0000011"_b, "rcwcasp_c64_rcwcomswappr"}, + {"0000100"_b, "ldclrp_128_memop_128"}, + {"0001100"_b, "ldsetp_128_memop_128"}, + {"0010010"_b, "rcwcasl_c64_rcwcomswap"}, + {"0010011"_b, "rcwcaspl_c64_rcwcomswappr"}, + {"0010100"_b, "ldclrpl_128_memop_128"}, + {"0011100"_b, "ldsetpl_128_memop_128"}, + {"0100010"_b, "rcwcasa_c64_rcwcomswap"}, + {"0100011"_b, "rcwcaspa_c64_rcwcomswappr"}, + {"0100100"_b, "ldclrpa_128_memop_128"}, + {"0101100"_b, "ldsetpa_128_memop_128"}, + {"0110010"_b, "rcwcasal_c64_rcwcomswap"}, + {"0110011"_b, "rcwcaspal_c64_rcwcomswappr"}, + {"0110100"_b, "ldclrpal_128_memop_128"}, + {"0111100"_b, "ldsetpal_128_memop_128"}, + {"1000010"_b, "rcwscas_c64_rcwcomswap"}, + {"1000011"_b, "rcwscasp_c64_rcwcomswappr"}, + {"1010010"_b, "rcwscasl_c64_rcwcomswap"}, + {"1010011"_b, "rcwscaspl_c64_rcwcomswappr"}, + {"1100010"_b, "rcwscasa_c64_rcwcomswap"}, + {"1100011"_b, "rcwscaspa_c64_rcwcomswappr"}, + {"1110010"_b, "rcwscasal_c64_rcwcomswap"}, + {"1110011"_b, "rcwscaspal_c64_rcwcomswappr"}, + }, + }, + + { "_jkvvtp", + {30, 23, 22}, + { {"100"_b, "bcax_vvv16_crypto4"}, + }, + }, + + { "_jkxyvn", + {23}, + { {"0"_b, "fadd_asimdsame_only"}, + {"1"_b, "fsub_asimdsame_only"}, + }, + }, + + { "_jlnjsy", + {23, 22, 20, 19, 18, 17, 16, 13, 12, 11}, + { {"0011111001"_b, "_ssjnph"}, + }, + }, + + { "_jlrrlt", + {11, 10, 4}, + { {"000"_b, "whilege_p_p_rr"}, + {"001"_b, "whilegt_p_p_rr"}, + {"010"_b, "whilelt_p_p_rr"}, + {"011"_b, "whilele_p_p_rr"}, + {"100"_b, "whilehs_p_p_rr"}, + {"101"_b, "whilehi_p_p_rr"}, + {"110"_b, "whilelo_p_p_rr"}, + {"111"_b, "whilels_p_p_rr"}, + }, + }, + + { "_jmvgsp", + {22, 20, 11}, + { {"100"_b, "sqinch_z_zs"}, + {"101"_b, "sqdech_z_zs"}, + {"110"_b, "inch_z_zs"}, + }, + }, + + { "_jmxstz", + {13, 12, 11, 10}, + { {"0000"_b, "sqdecp_z_p_z"}, + {"0010"_b, "sqdecp_r_p_r_sx"}, + {"0011"_b, "sqdecp_r_p_r_x"}, + }, + }, + + { "_jnktqs", + {18, 17}, + { {"00"_b, "ld1_asisdlso_s1_1s"}, + }, + }, + + { "_jnnmjk", + {23, 22, 20, 19, 16, 13, 12}, + { {"0111100"_b, "fcvtas_asisdmiscfp16_r"}, + {"0111101"_b, "scvtf_asisdmiscfp16_r"}, + {"0x00100"_b, "fcvtas_asisdmisc_r"}, + {"0x00101"_b, "scvtf_asisdmisc_r"}, + {"0x10000"_b, "fmaxnmp_asisdpair_only_h"}, + {"0x10001"_b, "faddp_asisdpair_only_h"}, + {"0x10011"_b, "fmaxp_asisdpair_only_h"}, + {"1111000"_b, "fcmgt_asisdmiscfp16_fz"}, + {"1111001"_b, "fcmeq_asisdmiscfp16_fz"}, + {"1111010"_b, "fcmlt_asisdmiscfp16_fz"}, + {"1111101"_b, "frecpe_asisdmiscfp16_r"}, + {"1111111"_b, "frecpx_asisdmiscfp16_r"}, + {"1x00000"_b, "fcmgt_asisdmisc_fz"}, + {"1x00001"_b, "fcmeq_asisdmisc_fz"}, + {"1x00010"_b, "fcmlt_asisdmisc_fz"}, + {"1x00101"_b, "frecpe_asisdmisc_r"}, + {"1x00111"_b, "frecpx_asisdmisc_r"}, + {"1x10000"_b, "fminnmp_asisdpair_only_h"}, + {"1x10011"_b, "fminp_asisdpair_only_h"}, + }, + }, + + { "_jpvmkz", + {18, 17}, + { {"00"_b, "_jnnmjk"}, + }, + }, + + { "_jqhvhn", + {30, 23, 11, 10}, + { {"0000"_b, "_ygtpyl"}, + {"0010"_b, "_hqvhjp"}, + {"0100"_b, "_xkylhh"}, + {"0110"_b, "_mnxgml"}, + {"1000"_b, "_qyyrqq"}, + {"1001"_b, "ldraa_64_ldst_pac"}, + {"1010"_b, "_kpsnsk"}, + {"1011"_b, "ldraa_64w_ldst_pac"}, + {"1100"_b, "_tyzpxk"}, + {"1101"_b, "ldrab_64_ldst_pac"}, + {"1111"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_jqlgts", + {30, 23, 22}, + { {"000"_b, "str_s_ldst_pos"}, + {"001"_b, "ldr_s_ldst_pos"}, + {"100"_b, "str_d_ldst_pos"}, + {"101"_b, "ldr_d_ldst_pos"}, + }, + }, + + { "_jqrmyp", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fabs_h_floatdp1"}, + {"000010"_b, "fsqrt_h_floatdp1"}, + {"000100"_b, "fcvt_dh_floatdp1"}, + {"001000"_b, "frintp_h_floatdp1"}, + {"001010"_b, "frintz_h_floatdp1"}, + {"001110"_b, "frinti_h_floatdp1"}, + }, + }, + + { "_jqsjtj", + {18}, + { {"0"_b, "st2_asisdlse_r2"}, + }, + }, + + { "_jqtksx", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "ldsmaxb_32_memop"}, + {"0000100"_b, "ldsminb_32_memop"}, + {"0000x10"_b, "strb_32b_ldst_regoff"}, + {"0001000"_b, "ldumaxb_32_memop"}, + {"0001100"_b, "lduminb_32_memop"}, + {"0001x10"_b, "strb_32bl_ldst_regoff"}, + {"0010000"_b, "ldsmaxlb_32_memop"}, + {"0010100"_b, "ldsminlb_32_memop"}, + {"0010x10"_b, "ldrb_32b_ldst_regoff"}, + {"0011000"_b, "ldumaxlb_32_memop"}, + {"0011100"_b, "lduminlb_32_memop"}, + {"0011x10"_b, "ldrb_32bl_ldst_regoff"}, + {"0100000"_b, "ldsmaxab_32_memop"}, + {"0100100"_b, "ldsminab_32_memop"}, + {"0100x10"_b, "ldrsb_64b_ldst_regoff"}, + {"0101000"_b, "ldumaxab_32_memop"}, + {"0101100"_b, "lduminab_32_memop"}, + {"0101x10"_b, "ldrsb_64bl_ldst_regoff"}, + {"0110000"_b, "ldsmaxalb_32_memop"}, + {"0110100"_b, "ldsminalb_32_memop"}, + {"0110x10"_b, "ldrsb_32b_ldst_regoff"}, + {"0111000"_b, "ldumaxalb_32_memop"}, + {"0111100"_b, "lduminalb_32_memop"}, + {"0111x10"_b, "ldrsb_32bl_ldst_regoff"}, + {"1000000"_b, "ldsmaxh_32_memop"}, + {"1000100"_b, "ldsminh_32_memop"}, + {"1001000"_b, "ldumaxh_32_memop"}, + {"1001100"_b, "lduminh_32_memop"}, + {"100xx10"_b, "strh_32_ldst_regoff"}, + {"1010000"_b, "ldsmaxlh_32_memop"}, + {"1010100"_b, "ldsminlh_32_memop"}, + {"1011000"_b, "ldumaxlh_32_memop"}, + {"1011100"_b, "lduminlh_32_memop"}, + {"101xx10"_b, "ldrh_32_ldst_regoff"}, + {"1100000"_b, "ldsmaxah_32_memop"}, + {"1100100"_b, "ldsminah_32_memop"}, + {"1101000"_b, "ldumaxah_32_memop"}, + {"1101100"_b, "lduminah_32_memop"}, + {"110xx10"_b, "ldrsh_64_ldst_regoff"}, + {"1110000"_b, "ldsmaxalh_32_memop"}, + {"1110100"_b, "ldsminalh_32_memop"}, + {"1111000"_b, "ldumaxalh_32_memop"}, + {"1111100"_b, "lduminalh_32_memop"}, + {"111xx10"_b, "ldrsh_32_ldst_regoff"}, + }, + }, + + { "_jqvpqx", + {23, 22}, + { {"00"_b, "fmlal_asimdsame_f"}, + {"10"_b, "fmlsl_asimdsame_f"}, + }, + }, + + { "_jqxqql", + {22, 20, 11}, + { {"000"_b, "uqincw_z_zs"}, + {"001"_b, "uqdecw_z_zs"}, + {"010"_b, "decw_z_zs"}, + {"100"_b, "uqincd_z_zs"}, + {"101"_b, "uqdecd_z_zs"}, + {"110"_b, "decd_z_zs"}, + }, + }, + + { "_jrlynj", + {11, 10}, + { {"00"_b, "_gzqvnk"}, + }, + }, + + { "_jrnxzh", + {12}, + { {"0"_b, "cmla_z_zzz"}, + {"1"_b, "sqrdcmlah_z_zzz"}, + }, + }, + + { "_jrqxvn", + {23, 22, 13, 12, 11, 10}, + { {"000000"_b, "tbl_asimdtbl_l3_3"}, + {"000100"_b, "tbx_asimdtbl_l3_3"}, + {"001000"_b, "tbl_asimdtbl_l4_4"}, + {"001100"_b, "tbx_asimdtbl_l4_4"}, + {"xx0110"_b, "uzp2_asimdperm_only"}, + {"xx1010"_b, "trn2_asimdperm_only"}, + {"xx1110"_b, "zip2_asimdperm_only"}, + }, + }, + + { "_jrxtzg", + {30, 23, 22, 11, 10}, + { {"10001"_b, "stg_64spost_ldsttags"}, + {"10010"_b, "stg_64soffset_ldsttags"}, + {"10011"_b, "stg_64spre_ldsttags"}, + {"10100"_b, "ldg_64loffset_ldsttags"}, + {"10101"_b, "stzg_64spost_ldsttags"}, + {"10110"_b, "stzg_64soffset_ldsttags"}, + {"10111"_b, "stzg_64spre_ldsttags"}, + {"11001"_b, "st2g_64spost_ldsttags"}, + {"11010"_b, "st2g_64soffset_ldsttags"}, + {"11011"_b, "st2g_64spre_ldsttags"}, + {"11101"_b, "stz2g_64spost_ldsttags"}, + {"11110"_b, "stz2g_64soffset_ldsttags"}, + {"11111"_b, "stz2g_64spre_ldsttags"}, + }, + }, + + { "_jsqvtn", + {23, 22, 11, 10}, + { {"0000"_b, "_lnsjqy"}, + {"0001"_b, "stg_64spost_ldsttags"}, + {"0010"_b, "stg_64soffset_ldsttags"}, + {"0011"_b, "stg_64spre_ldsttags"}, + {"0100"_b, "ldg_64loffset_ldsttags"}, + {"0101"_b, "stzg_64spost_ldsttags"}, + {"0110"_b, "stzg_64soffset_ldsttags"}, + {"0111"_b, "stzg_64spre_ldsttags"}, + {"1000"_b, "_myzhml"}, + {"1001"_b, "st2g_64spost_ldsttags"}, + {"1010"_b, "st2g_64soffset_ldsttags"}, + {"1011"_b, "st2g_64spre_ldsttags"}, + {"1100"_b, "_mjstgz"}, + {"1101"_b, "stz2g_64spost_ldsttags"}, + {"1110"_b, "stz2g_64soffset_ldsttags"}, + {"1111"_b, "stz2g_64spre_ldsttags"}, + }, + }, + + { "_jvkxtj", + {30, 23, 22}, + { {"000"_b, "stnp_q_ldstnapair_offs"}, + {"001"_b, "ldnp_q_ldstnapair_offs"}, + {"010"_b, "stp_q_ldstpair_post"}, + {"011"_b, "ldp_q_ldstpair_post"}, + }, + }, + + { "_jvnsgt", + {18}, + { {"0"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1"_b, "ld4_asisdlsop_b4_i4b"}, + }, + }, + + { "_jvpjsm", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "_xrnqyn"}, + }, + }, + + { "_jxgpgg", + {13, 12}, + { {"00"_b, "udiv_64_dp_2src"}, + {"10"_b, "asrv_64_dp_2src"}, + }, + }, + + { "_jxgqqz", + {30}, + { {"0"_b, "cbz_64_compbranch"}, + }, + }, + + { "_jxltqm", + {13, 12}, + { {"01"_b, "sqdmull_asisddiff_only"}, + }, + }, + + { "_jxszhy", + {23, 22, 11}, + { {"000"_b, "_rqhryp"}, + }, + }, + + { "_jxyskn", + {13, 12, 11, 10}, + { {"0000"_b, "uqincp_z_p_z"}, + {"0010"_b, "uqincp_r_p_r_uw"}, + {"0011"_b, "uqincp_r_p_r_x"}, + }, + }, + + { "_jymnkk", + {23, 22, 12, 11, 10}, + { {"01000"_b, "bfdot_z_zzzi"}, + {"100x0"_b, "fmlalb_z_zzzi_s"}, + {"100x1"_b, "fmlalt_z_zzzi_s"}, + {"110x0"_b, "bfmlalb_z_zzzi"}, + {"110x1"_b, "bfmlalt_z_zzzi"}, + }, + }, + + { "_jyzhnh", + {18}, + { {"0"_b, "st1_asisdlsop_hx1_r1h"}, + {"1"_b, "st1_asisdlsop_h1_i1h"}, + }, + }, + + { "_jzjvtv", + {19, 18, 17, 16, 4}, + { {"00000"_b, "brkbs_p_p_p_z"}, + }, + }, + + { "_jzkqhn", + {23, 22, 12, 11, 10}, + { {"10000"_b, "fmlslb_z_zzz"}, + {"10001"_b, "fmlslt_z_zzz"}, + }, + }, + + { "_jztlrz", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtmu_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtmu_asimdmisc_r"}, + {"1111001"_b, "fcvtzu_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtzu_asimdmisc_r"}, + {"xx00000"_b, "neg_asimdmisc_r"}, + }, + }, + + { "_jztspt", + {18, 17}, + { {"00"_b, "st4_asisdlso_s4_4s"}, + }, + }, + + { "_kgmqkh", + {30, 23, 22, 13}, + { {"0000"_b, "ld1w_z_p_ai_s"}, + {"0001"_b, "ldff1w_z_p_ai_s"}, + {"0010"_b, "ld1rw_z_p_bi_u32"}, + {"0011"_b, "ld1rw_z_p_bi_u64"}, + {"0110"_b, "ld1rsb_z_p_bi_s16"}, + {"0111"_b, "ld1rd_z_p_bi_u64"}, + {"1000"_b, "ld1w_z_p_ai_d"}, + {"1001"_b, "ldff1w_z_p_ai_d"}, + {"1010"_b, "ld1w_z_p_bz_d_64_scaled"}, + {"1011"_b, "ldff1w_z_p_bz_d_64_scaled"}, + {"1100"_b, "ld1d_z_p_ai_d"}, + {"1101"_b, "ldff1d_z_p_ai_d"}, + {"1110"_b, "ld1d_z_p_bz_d_64_scaled"}, + {"1111"_b, "ldff1d_z_p_bz_d_64_scaled"}, + }, + }, + + { "_kgpgly", + {23, 22, 10}, + { {"100"_b, "smlslb_z_zzzi_s"}, + {"101"_b, "smlslt_z_zzzi_s"}, + {"110"_b, "smlslb_z_zzzi_d"}, + {"111"_b, "smlslt_z_zzzi_d"}, + }, + }, + + { "_kgpsjz", + {13, 12, 11, 10}, + { {"0000"_b, "saddl_asimddiff_l"}, + {"0001"_b, "shadd_asimdsame_only"}, + {"0010"_b, "_rkrlsy"}, + {"0011"_b, "sqadd_asimdsame_only"}, + {"0100"_b, "saddw_asimddiff_w"}, + {"0101"_b, "srhadd_asimdsame_only"}, + {"0110"_b, "_vypgrt"}, + {"0111"_b, "_xygvjp"}, + {"1000"_b, "ssubl_asimddiff_l"}, + {"1001"_b, "shsub_asimdsame_only"}, + {"1010"_b, "_pjhmvy"}, + {"1011"_b, "sqsub_asimdsame_only"}, + {"1100"_b, "ssubw_asimddiff_w"}, + {"1101"_b, "cmgt_asimdsame_only"}, + {"1110"_b, "_ygghnn"}, + {"1111"_b, "cmge_asimdsame_only"}, + }, + }, + + { "_kgygky", + {30, 23, 22}, + { {"000"_b, "sbfm_32m_bitfield"}, + {"100"_b, "ubfm_32m_bitfield"}, + }, + }, + + { "_khjvqq", + {22, 11}, + { {"00"_b, "sqrdmulh_z_zzi_s"}, + {"10"_b, "sqrdmulh_z_zzi_d"}, + }, + }, + + { "_khrsgv", + {22, 20, 19, 13, 12}, + { {"0x100"_b, "sri_asisdshf_r"}, + {"0x101"_b, "sli_asisdshf_r"}, + {"0x110"_b, "sqshlu_asisdshf_r"}, + {"0x111"_b, "uqshl_asisdshf_r"}, + {"10x00"_b, "sri_asisdshf_r"}, + {"10x01"_b, "sli_asisdshf_r"}, + {"10x10"_b, "sqshlu_asisdshf_r"}, + {"10x11"_b, "uqshl_asisdshf_r"}, + {"11100"_b, "sri_asisdshf_r"}, + {"11101"_b, "sli_asisdshf_r"}, + {"11110"_b, "sqshlu_asisdshf_r"}, + {"11111"_b, "uqshl_asisdshf_r"}, + {"x1000"_b, "sri_asisdshf_r"}, + {"x1001"_b, "sli_asisdshf_r"}, + {"x1010"_b, "sqshlu_asisdshf_r"}, + {"x1011"_b, "uqshl_asisdshf_r"}, + }, + }, + + { "_khtsmx", + {18}, + { {"0"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1"_b, "ld4_asisdlsop_h4_i4h"}, + }, + }, + + { "_khvvtr", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "rev16_32_dp_1src"}, + {"0000001"_b, "cls_32_dp_1src"}, + }, + }, + + { "_kjpxvh", + {20, 19, 18}, + { {"000"_b, "_yyrkmn"}, + }, + }, + + { "_kjqynn", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_kjsrkm", + {18, 17, 16, 13, 12, 11, 10, 9, 8, 7, 4, 3, 2, 1, 0}, + { {"000000000011111"_b, "_zztypv"}, + }, + }, + + { "_kkkltp", + {30}, + { {"1"_b, "_sqkkqy"}, + }, + }, + + { "_kkpxth", + {18}, + { {"0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"1"_b, "ld1_asisdlsop_b1_i1b"}, + }, + }, + + { "_kktglv", + {30, 13, 12}, + { {"000"_b, "_njvkjq"}, + {"001"_b, "_rpzykx"}, + {"010"_b, "_zzvxvh"}, + {"011"_b, "_yqxnzl"}, + {"100"_b, "_gxmnkl"}, + {"110"_b, "_lkxgjy"}, + {"111"_b, "_vjmklj"}, + }, + }, + + { "_kktzst", + {13, 12, 11, 10}, + { {"1111"_b, "frsqrts_asisdsamefp16_only"}, + }, + }, + + { "_kkvrzq", + {23, 22, 9, 8, 7, 6, 5}, + { {"0000000"_b, "pfalse_p"}, + }, + }, + + { "_klrksl", + {30, 23, 22, 19, 16}, + { {"10010"_b, "aesmc_b_cryptoaes"}, + {"x0x01"_b, "fcvtn_asimdmisc_n"}, + {"x1001"_b, "bfcvtn_asimdmisc_4s"}, + {"xxx00"_b, "sadalp_asimdmisc_p"}, + }, + }, + + { "_klsmsv", + {30, 23, 22, 10}, + { {"1001"_b, "ins_asimdins_iv_v"}, + {"x000"_b, "ext_asimdext_only"}, + }, + }, + + { "_kltlmp", + {22, 20, 19, 13, 12}, + { {"0x100"_b, "ushr_asisdshf_r"}, + {"0x101"_b, "usra_asisdshf_r"}, + {"0x110"_b, "urshr_asisdshf_r"}, + {"0x111"_b, "ursra_asisdshf_r"}, + {"10x00"_b, "ushr_asisdshf_r"}, + {"10x01"_b, "usra_asisdshf_r"}, + {"10x10"_b, "urshr_asisdshf_r"}, + {"10x11"_b, "ursra_asisdshf_r"}, + {"11100"_b, "ushr_asisdshf_r"}, + {"11101"_b, "usra_asisdshf_r"}, + {"11110"_b, "urshr_asisdshf_r"}, + {"11111"_b, "ursra_asisdshf_r"}, + {"x1000"_b, "ushr_asisdshf_r"}, + {"x1001"_b, "usra_asisdshf_r"}, + {"x1010"_b, "urshr_asisdshf_r"}, + {"x1011"_b, "ursra_asisdshf_r"}, + }, + }, + + { "_klxxgx", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fmov_s_floatdp1"}, + {"000010"_b, "fneg_s_floatdp1"}, + {"001000"_b, "frintn_s_floatdp1"}, + {"001010"_b, "frintm_s_floatdp1"}, + {"001100"_b, "frinta_s_floatdp1"}, + {"001110"_b, "frintx_s_floatdp1"}, + {"010000"_b, "frint32z_s_floatdp1"}, + {"010010"_b, "frint64z_s_floatdp1"}, + }, + }, + + { "_kmqlmz", + {18}, + { {"0"_b, "st1_asisdlso_b1_1b"}, + }, + }, + + { "_knkjnz", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1sh_z_p_bi_s32"}, + {"00011"_b, "ldnf1sh_z_p_bi_s32"}, + {"00101"_b, "ld1w_z_p_bi_u64"}, + {"00111"_b, "ldnf1w_z_p_bi_u64"}, + {"01001"_b, "ld1sb_z_p_bi_s32"}, + {"01011"_b, "ldnf1sb_z_p_bi_s32"}, + {"01101"_b, "ld1d_z_p_bi_u64"}, + {"01111"_b, "ldnf1d_z_p_bi_u64"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"}, + {"100x1"_b, "st1w_z_p_bz_d_64_scaled"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"}, + {"101x1"_b, "st1w_z_p_ai_s"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"}, + {"110x1"_b, "st1d_z_p_bz_d_64_scaled"}, + }, + }, + + { "_knpjtt", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldxrh_lr32_ldstexcl"}, + }, + }, + + { "_kpgghm", + {22, 20, 19, 18, 17, 16, 13, 12}, + { {"01111100"_b, "ldapr_64l_memop"}, + }, + }, + + { "_kpnlmr", + {20, 19, 18, 17, 16}, + { {"00000"_b, "clz_asimdmisc_r"}, + {"00001"_b, "uqxtn_asimdmisc_n"}, + }, + }, + + { "_kppzvh", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_s_ldapstl_simd"}, + {"001xx10"_b, "ldapur_s_ldapstl_simd"}, + {"100xx10"_b, "stlur_d_ldapstl_simd"}, + {"101xx10"_b, "ldapur_d_ldapstl_simd"}, + {"x000001"_b, "cpyprn_cpy_memcms"}, + {"x000101"_b, "cpypwtrn_cpy_memcms"}, + {"x001001"_b, "cpyprtrn_cpy_memcms"}, + {"x001101"_b, "cpyptrn_cpy_memcms"}, + {"x010001"_b, "cpymrn_cpy_memcms"}, + {"x010101"_b, "cpymwtrn_cpy_memcms"}, + {"x011001"_b, "cpymrtrn_cpy_memcms"}, + {"x011101"_b, "cpymtrn_cpy_memcms"}, + {"x100001"_b, "cpyern_cpy_memcms"}, + {"x100101"_b, "cpyewtrn_cpy_memcms"}, + {"x101001"_b, "cpyertrn_cpy_memcms"}, + {"x101101"_b, "cpyetrn_cpy_memcms"}, + {"x110001"_b, "setge_set_memcms"}, + {"x110101"_b, "setget_set_memcms"}, + {"x111001"_b, "setgen_set_memcms"}, + {"x111101"_b, "setgetn_set_memcms"}, + }, + }, + + { "_kpsnsk", + {22}, + { {"0"_b, "str_64_ldst_regoff"}, + {"1"_b, "ldr_64_ldst_regoff"}, + }, + }, + + { "_kqsqly", + {18}, + { {"0"_b, "st1_asisdlsep_r2_r2"}, + {"1"_b, "st1_asisdlsep_i2_i2"}, + }, + }, + + { "_kqstrr", + {18, 17, 12}, + { {"000"_b, "st3_asisdlso_d3_3d"}, + }, + }, + + { "_kqvljp", + {18, 17, 16}, + { {"000"_b, "fabd_z_p_zz"}, + {"001"_b, "fscale_z_p_zz"}, + {"010"_b, "fmulx_z_p_zz"}, + {"100"_b, "fdivr_z_p_zz"}, + {"101"_b, "fdiv_z_p_zz"}, + }, + }, + + { "_kqzmtr", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1b_z_p_bi_u16"}, + {"00011"_b, "ldnf1b_z_p_bi_u16"}, + {"00101"_b, "ld1b_z_p_bi_u64"}, + {"00111"_b, "ldnf1b_z_p_bi_u64"}, + {"01001"_b, "ld1h_z_p_bi_u16"}, + {"01011"_b, "ldnf1h_z_p_bi_u16"}, + {"01101"_b, "ld1h_z_p_bi_u64"}, + {"01111"_b, "ldnf1h_z_p_bi_u64"}, + {"101x1"_b, "st1b_z_p_ai_s"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_scaled"}, + {"110x1"_b, "st1h_z_p_bz_d_64_scaled"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_scaled"}, + {"111x1"_b, "st1h_z_p_ai_s"}, + }, + }, + + { "_krllsy", + {19}, + { {"1"_b, "sysp_cr_syspairinstrs"}, + }, + }, + + { "_krtvhr", + {12, 10}, + { {"00"_b, "_xvmxrg"}, + {"01"_b, "_mvvngm"}, + {"10"_b, "_mkyyng"}, + {"11"_b, "_vvzsmg"}, + }, + }, + + { "_krvxxx", + {12, 9, 8, 7, 6, 5}, + { {"100000"_b, "_skjqrx"}, + }, + }, + + { "_ksgpqz", + {30}, + { {"1"_b, "_trjmmn"}, + }, + }, + + { "_kshtnj", + {23, 22, 13, 12, 11, 10}, + { {"01x1x0"_b, "fcmla_asimdelem_c_h"}, + {"0x0001"_b, "sri_asimdshf_r"}, + {"0x0101"_b, "sli_asimdshf_r"}, + {"0x1001"_b, "sqshlu_asimdshf_r"}, + {"0x1101"_b, "uqshl_asimdshf_r"}, + {"10x1x0"_b, "fcmla_asimdelem_c_s"}, + {"xx00x0"_b, "mls_asimdelem_r"}, + {"xx10x0"_b, "umlsl_asimdelem_l"}, + }, + }, + + { "_kskqmz", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000011"_b, "sqabs_asisdmisc_r"}, + {"0000100"_b, "sqxtn_asisdmisc_n"}, + }, + }, + + { "_ksrkkn", + {22}, + { {"0"_b, "str_32_ldst_regoff"}, + {"1"_b, "ldr_32_ldst_regoff"}, + }, + }, + + { "_kssltr", + {13, 12, 11, 10}, + { {"0000"_b, "smull_asimddiff_l"}, + {"0001"_b, "_pstgvl"}, + {"0010"_b, "_ztlysk"}, + {"0011"_b, "_hxxqks"}, + {"0100"_b, "sqdmull_asimddiff_l"}, + {"0101"_b, "_jkxyvn"}, + {"0110"_b, "_lvsrnj"}, + {"0111"_b, "_vvgnhm"}, + {"1000"_b, "pmull_asimddiff_l"}, + {"1001"_b, "_skqzyg"}, + {"1010"_b, "_szqlsn"}, + {"1011"_b, "_jqvpqx"}, + {"1101"_b, "_yyvjqv"}, + {"1110"_b, "_xlyppq"}, + {"1111"_b, "_mhljkp"}, + }, + }, + + { "_ktngnm", + {12, 10}, + { {"00"_b, "_hxgngr"}, + {"01"_b, "_ngkgsg"}, + {"10"_b, "_plrggq"}, + {"11"_b, "_kxztps"}, + }, + }, + + { "_ktpxrr", + {30, 23, 22, 13, 12, 11, 10}, + { {"0001111"_b, "casp_cp32_ldstexcl"}, + {"0011111"_b, "caspa_cp32_ldstexcl"}, + {"0101111"_b, "casb_c32_ldstexcl"}, + {"0111111"_b, "casab_c32_ldstexcl"}, + {"1001111"_b, "casp_cp64_ldstexcl"}, + {"1011111"_b, "caspa_cp64_ldstexcl"}, + {"1101111"_b, "cash_c32_ldstexcl"}, + {"1111111"_b, "casah_c32_ldstexcl"}, + }, + }, + + { "_ktsgth", + {23, 22}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, + }, + }, + + { "_ktyppm", + {11, 10}, + { {"00"_b, "asr_z_zw"}, + {"01"_b, "lsr_z_zw"}, + {"11"_b, "lsl_z_zw"}, + }, + }, + + { "_ktyrgy", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_s_floatimm"}, + }, + }, + + { "_kvnqhn", + {22, 20, 11}, + { {"000"_b, "sqincw_r_rs_sx"}, + {"001"_b, "sqdecw_r_rs_sx"}, + {"010"_b, "sqincw_r_rs_x"}, + {"011"_b, "sqdecw_r_rs_x"}, + {"100"_b, "sqincd_r_rs_sx"}, + {"101"_b, "sqdecd_r_rs_sx"}, + {"110"_b, "sqincd_r_rs_x"}, + {"111"_b, "sqdecd_r_rs_x"}, + }, + }, + + { "_kxhmlx", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtnu_asisdmiscfp16_r"}, + {"0x00001"_b, "fcvtnu_asisdmisc_r"}, + {"1111001"_b, "fcvtpu_asisdmiscfp16_r"}, + {"1x00001"_b, "fcvtpu_asisdmisc_r"}, + }, + }, + + { "_kxmjsh", + {20, 19, 18, 17, 16}, + { {"10000"_b, "fmaxp_asisdpair_only_sd"}, + }, + }, + + { "_kxmxxm", + {23}, + { {"0"_b, "fcmge_asimdsame_only"}, + {"1"_b, "fcmgt_asimdsame_only"}, + }, + }, + + { "_kxpqhv", + {30, 23, 22, 11, 10}, + { {"10001"_b, "stg_64spost_ldsttags"}, + {"10010"_b, "stg_64soffset_ldsttags"}, + {"10011"_b, "stg_64spre_ldsttags"}, + {"10100"_b, "ldg_64loffset_ldsttags"}, + {"10101"_b, "stzg_64spost_ldsttags"}, + {"10110"_b, "stzg_64soffset_ldsttags"}, + {"10111"_b, "stzg_64spre_ldsttags"}, + {"11001"_b, "st2g_64spost_ldsttags"}, + {"11010"_b, "st2g_64soffset_ldsttags"}, + {"11011"_b, "st2g_64spre_ldsttags"}, + {"11101"_b, "stz2g_64spost_ldsttags"}, + {"11110"_b, "stz2g_64soffset_ldsttags"}, + {"11111"_b, "stz2g_64spre_ldsttags"}, + }, + }, + + { "_kxtqjh", + {23, 22}, + { {"01"_b, "_mhnlsy"}, + {"10"_b, "xar_vvv2_crypto3_imm6"}, + {"11"_b, "_spxvlt"}, + }, + }, + + { "_kxvvkq", + {30, 23, 13}, + { {"000"_b, "ld1b_z_p_bz_s_x32_unscaled"}, + {"001"_b, "ldff1b_z_p_bz_s_x32_unscaled"}, + {"010"_b, "ld1h_z_p_bz_s_x32_unscaled"}, + {"011"_b, "ldff1h_z_p_bz_s_x32_unscaled"}, + {"100"_b, "ld1b_z_p_bz_d_x32_unscaled"}, + {"101"_b, "ldff1b_z_p_bz_d_x32_unscaled"}, + {"110"_b, "ld1h_z_p_bz_d_x32_unscaled"}, + {"111"_b, "ldff1h_z_p_bz_d_x32_unscaled"}, + }, + }, + + { "_kxztps", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "orr_asimdimm_l_sl"}, + {"00x100"_b, "shl_asimdshf_r"}, + {"00x110"_b, "sqshl_asimdshf_r"}, + {"010x00"_b, "shl_asimdshf_r"}, + {"010x10"_b, "sqshl_asimdshf_r"}, + {"011100"_b, "shl_asimdshf_r"}, + {"011110"_b, "sqshl_asimdshf_r"}, + {"0x1000"_b, "shl_asimdshf_r"}, + {"0x1010"_b, "sqshl_asimdshf_r"}, + }, + }, + + { "_kyhhqt", + {23, 20, 19, 18, 17, 16, 13}, + { {"0000000"_b, "ld1r_asisdlso_r1"}, + {"0000001"_b, "ld3r_asisdlso_r3"}, + {"10xxxx0"_b, "ld1r_asisdlsop_rx1_r"}, + {"10xxxx1"_b, "ld3r_asisdlsop_rx3_r"}, + {"110xxx0"_b, "ld1r_asisdlsop_rx1_r"}, + {"110xxx1"_b, "ld3r_asisdlsop_rx3_r"}, + {"1110xx0"_b, "ld1r_asisdlsop_rx1_r"}, + {"1110xx1"_b, "ld3r_asisdlsop_rx3_r"}, + {"11110x0"_b, "ld1r_asisdlsop_rx1_r"}, + {"11110x1"_b, "ld3r_asisdlsop_rx3_r"}, + {"1111100"_b, "ld1r_asisdlsop_rx1_r"}, + {"1111101"_b, "ld3r_asisdlsop_rx3_r"}, + {"1111110"_b, "ld1r_asisdlsop_r1_i"}, + {"1111111"_b, "ld3r_asisdlsop_r3_i"}, + }, + }, + + { "_kyjxrr", + {30, 13}, + { {"00"_b, "_qtxpky"}, + {"01"_b, "_hnjrmp"}, + {"11"_b, "_vzjvtv"}, + }, + }, + + { "_kynxnz", + {30, 23, 22, 20, 19}, + { {"0xxxx"_b, "bl_only_branch_imm"}, + {"10001"_b, "sysl_rc_systeminstrs"}, + {"1001x"_b, "mrs_rs_systemmove"}, + {"1011x"_b, "mrrs_rs_systemmovepr"}, + }, + }, + + { "_kyspnn", + {22}, + { {"0"_b, "sqdmullb_z_zzi_s"}, + {"1"_b, "sqdmullb_z_zzi_d"}, + }, + }, + + { "_kyxrqg", + {10}, + { {"0"_b, "uabalb_z_zzz"}, + {"1"_b, "uabalt_z_zzz"}, + }, + }, + + { "_kzjxxk", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "rbit_32_dp_1src"}, + {"0000001"_b, "clz_32_dp_1src"}, + {"0000010"_b, "abs_32_dp_1src"}, + }, + }, + + { "_kzksnv", + {13, 12}, + { {"00"_b, "sqshl_asisdsame_only"}, + {"01"_b, "sqrshl_asisdsame_only"}, + }, + }, + + { "_kzmvpk", + {23, 22, 10}, + { {"100"_b, "smlalb_z_zzzi_s"}, + {"101"_b, "smlalt_z_zzzi_s"}, + {"110"_b, "smlalb_z_zzzi_d"}, + {"111"_b, "smlalt_z_zzzi_d"}, + }, + }, + + { "_kzprzt", + {9, 8, 7, 6, 5, 2, 1}, + { {"1111111"_b, "retaa_64e_branch_reg"}, + }, + }, + + { "_kzpyzy", + {30, 23, 22, 13}, + { {"0000"_b, "ld1sh_z_p_br_s64"}, + {"0001"_b, "ldff1sh_z_p_br_s64"}, + {"0010"_b, "ld1w_z_p_br_u32"}, + {"0011"_b, "ldff1w_z_p_br_u32"}, + {"0100"_b, "ld1sb_z_p_br_s64"}, + {"0101"_b, "ldff1sb_z_p_br_s64"}, + {"0110"_b, "ld1sb_z_p_br_s16"}, + {"0111"_b, "ldff1sb_z_p_br_s16"}, + {"1001"_b, "stnt1w_z_p_br_contiguous"}, + {"1010"_b, "st1w_z_p_br"}, + {"1011"_b, "st3w_z_p_br_contiguous"}, + {"1100"_b, "str_z_bi"}, + {"1101"_b, "stnt1d_z_p_br_contiguous"}, + {"1111"_b, "st3d_z_p_br_contiguous"}, + }, + }, + + { "_kzyzrh", + {16, 13, 12}, + { {"000"_b, "rev16_64_dp_1src"}, + {"001"_b, "cls_64_dp_1src"}, + {"100"_b, "pacib_64p_dp_1src"}, + {"101"_b, "autib_64p_dp_1src"}, + {"110"_b, "_vpyvjr"}, + {"111"_b, "_sntnsm"}, + }, + }, + + { "_lgmlmt", + {18, 17}, + { {"00"_b, "ld3_asisdlse_r3"}, + }, + }, + + { "_lgyqpk", + {18, 17}, + { {"0x"_b, "st2_asisdlsop_sx2_r2s"}, + {"10"_b, "st2_asisdlsop_sx2_r2s"}, + {"11"_b, "st2_asisdlsop_s2_i2s"}, + }, + }, + + { "_lgzlyq", + {30, 23, 11, 10}, + { {"1001"_b, "_kltlmp"}, + }, + }, + + { "_ljljkv", + {30, 23, 22, 13, 12, 11, 10}, + { {"0001100"_b, "and_z_zz"}, + {"0001110"_b, "eor3_z_zzz"}, + {"0001111"_b, "bsl_z_zzz"}, + {"0011100"_b, "orr_z_zz"}, + {"0011110"_b, "bcax_z_zzz"}, + {"0011111"_b, "bsl1n_z_zzz"}, + {"0101100"_b, "eor_z_zz"}, + {"0101111"_b, "bsl2n_z_zzz"}, + {"0111100"_b, "bic_z_zz"}, + {"0111111"_b, "nbsl_z_zzz"}, + {"0xx0000"_b, "add_z_zz"}, + {"0xx0001"_b, "sub_z_zz"}, + {"0xx0100"_b, "sqadd_z_zz"}, + {"0xx0101"_b, "uqadd_z_zz"}, + {"0xx0110"_b, "sqsub_z_zz"}, + {"0xx0111"_b, "uqsub_z_zz"}, + {"0xx1101"_b, "xar_z_zzi"}, + {"10x0010"_b, "mla_z_zzzi_h"}, + {"10x0011"_b, "mls_z_zzzi_h"}, + {"10x0100"_b, "sqrdmlah_z_zzzi_h"}, + {"10x0101"_b, "sqrdmlsh_z_zzzi_h"}, + {"1100000"_b, "sdot_z_zzzi_s"}, + {"1100001"_b, "udot_z_zzzi_s"}, + {"1100010"_b, "mla_z_zzzi_s"}, + {"1100011"_b, "mls_z_zzzi_s"}, + {"1100100"_b, "sqrdmlah_z_zzzi_s"}, + {"1100101"_b, "sqrdmlsh_z_zzzi_s"}, + {"1100110"_b, "usdot_z_zzzi_s"}, + {"1100111"_b, "sudot_z_zzzi_s"}, + {"11010x0"_b, "sqdmlalb_z_zzzi_s"}, + {"11010x1"_b, "sqdmlalt_z_zzzi_s"}, + {"11011x0"_b, "sqdmlslb_z_zzzi_s"}, + {"11011x1"_b, "sqdmlslt_z_zzzi_s"}, + {"1110000"_b, "sdot_z_zzzi_d"}, + {"1110001"_b, "udot_z_zzzi_d"}, + {"1110010"_b, "mla_z_zzzi_d"}, + {"1110011"_b, "mls_z_zzzi_d"}, + {"1110100"_b, "sqrdmlah_z_zzzi_d"}, + {"1110101"_b, "sqrdmlsh_z_zzzi_d"}, + {"11110x0"_b, "sqdmlalb_z_zzzi_d"}, + {"11110x1"_b, "sqdmlalt_z_zzzi_d"}, + {"11111x0"_b, "sqdmlslb_z_zzzi_d"}, + {"11111x1"_b, "sqdmlslt_z_zzzi_d"}, + }, + }, + + { "_ljtvgz", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "ucvtf_asimdmiscfp16_r"}, + {"0x00001"_b, "ucvtf_asimdmisc_r"}, + {"1111000"_b, "fcmle_asimdmiscfp16_fz"}, + {"1111001"_b, "frsqrte_asimdmiscfp16_r"}, + {"1x00000"_b, "fcmle_asimdmisc_fz"}, + {"1x00001"_b, "frsqrte_asimdmisc_r"}, + }, + }, + + { "_lkpprr", + {30, 23, 22}, + { {"000"_b, "sbfm_32m_bitfield"}, + {"100"_b, "ubfm_32m_bitfield"}, + }, + }, + + { "_lkttgy", + {10}, + { {"0"_b, "saba_z_zzz"}, + {"1"_b, "uaba_z_zzz"}, + }, + }, + + { "_lkxgjy", + {23, 22}, + { {"10"_b, "cmla_z_zzzi_h"}, + {"11"_b, "cmla_z_zzzi_s"}, + }, + }, + + { "_lkzyzv", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_vgxtvy"}, + }, + }, + + { "_lljxgp", + {1}, + { {"1"_b, "blrabz_64_branch_reg"}, + }, + }, + + { "_llpsqq", + {13, 12, 10}, + { {"001"_b, "_zjjxjl"}, + {"100"_b, "ptrues_p_s"}, + {"110"_b, "_njngkk"}, + }, + }, + + { "_llqtkj", + {18, 17}, + { {"00"_b, "ld2_asisdlso_s2_2s"}, + }, + }, + + { "_lltzjg", + {18, 17, 12}, + { {"0x0"_b, "ld2_asisdlsop_dx2_r2d"}, + {"100"_b, "ld2_asisdlsop_dx2_r2d"}, + {"110"_b, "ld2_asisdlsop_d2_i2d"}, + }, + }, + + { "_llvrrk", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtnb_z_zz"}, + }, + }, + + { "_lmmjvx", + {4}, + { {"0"_b, "ccmn_64_condcmp_reg"}, + }, + }, + + { "_lmmkzh", + {4, 3, 2, 1, 0}, + { {"11111"_b, "_nntvzj"}, + }, + }, + + { "_lmyxhr", + {9, 4}, + { {"00"_b, "_gnqhsl"}, + }, + }, + + { "_lnkrzt", + {18, 4}, + { {"00"_b, "fcmne_p_p_z0"}, + }, + }, + + { "_lnmhqq", + {22, 13, 12}, + { {"000"_b, "ldsmaxa_64_memop"}, + {"001"_b, "ldsmina_64_memop"}, + {"010"_b, "ldumaxa_64_memop"}, + {"011"_b, "ldumina_64_memop"}, + {"100"_b, "ldsmaxal_64_memop"}, + {"101"_b, "ldsminal_64_memop"}, + {"110"_b, "ldumaxal_64_memop"}, + {"111"_b, "lduminal_64_memop"}, + }, + }, + + { "_lnntps", + {30, 11, 10}, + { {"000"_b, "_gvxjvz"}, + {"001"_b, "_ypzllm"}, + {"011"_b, "_gslmjl"}, + {"100"_b, "_jxltqm"}, + {"101"_b, "_shqyqv"}, + {"110"_b, "_jpvmkz"}, + {"111"_b, "_pxnyvl"}, + }, + }, + + { "_lnsjqy", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "stzgm_64bulk_ldsttags"}, + }, + }, + + { "_lplpkk", + {30, 23, 22, 13, 12, 11, 10}, + { {"1101001"_b, "smmla_asimdsame2_g"}, + {"1101011"_b, "usmmla_asimdsame2_g"}, + {"x100111"_b, "usdot_asimdsame2_d"}, + {"xxx0101"_b, "sdot_asimdsame2_d"}, + }, + }, + + { "_lplzxv", + {13, 12, 11, 10}, + { {"0000"_b, "umull_asimddiff_l"}, + {"0001"_b, "_yxgmrs"}, + {"0010"_b, "_vyqxyz"}, + {"0011"_b, "_snzvtt"}, + {"0101"_b, "_svgvjm"}, + {"0110"_b, "_ljtvgz"}, + {"0111"_b, "_snhmgn"}, + {"1001"_b, "_kxmxxm"}, + {"1010"_b, "_nkpyjg"}, + {"1011"_b, "_gmsqqz"}, + {"1101"_b, "_gzgpjp"}, + {"1110"_b, "_nzmqhv"}, + {"1111"_b, "_xgxtlr"}, + }, + }, + + { "_lptrlg", + {13, 12}, + { {"00"_b, "sqadd_asisdsame_only"}, + {"10"_b, "sqsub_asisdsame_only"}, + {"11"_b, "cmge_asisdsame_only"}, + }, + }, + + { "_lpzgvs", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stllr_sl32_ldstexcl"}, + }, + }, + + { "_lqjlkj", + {13, 12}, + { {"00"_b, "cpyfp_cpy_memcms"}, + {"01"_b, "cpyfpwt_cpy_memcms"}, + {"10"_b, "cpyfprt_cpy_memcms"}, + {"11"_b, "cpyfpt_cpy_memcms"}, + }, + }, + + { "_lqknkn", + {18, 17}, + { {"0x"_b, "st4_asisdlsop_sx4_r4s"}, + {"10"_b, "st4_asisdlsop_sx4_r4s"}, + {"11"_b, "st4_asisdlsop_s4_i4s"}, + }, + }, + + { "_lqlrxp", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stlrb_sl32_ldstexcl"}, + }, + }, + + { "_lqmksm", + {30, 23, 22, 20, 13, 4}, + { {"00001x"_b, "ld1row_z_p_bi_u32"}, + {"000x0x"_b, "ld1row_z_p_br_contiguous"}, + {"01001x"_b, "ld1rod_z_p_bi_u64"}, + {"010x0x"_b, "ld1rod_z_p_br_contiguous"}, + {"110x00"_b, "str_p_bi"}, + }, + }, + + { "_lrmgmq", + {30, 23, 22}, + { {"00x"_b, "add_64_addsub_imm"}, + {"010"_b, "addg_64_addsub_immtags"}, + {"10x"_b, "sub_64_addsub_imm"}, + {"110"_b, "subg_64_addsub_immtags"}, + }, + }, + + { "_lrntmz", + {13, 12, 11, 10}, + { {"0000"_b, "saddlb_z_zz"}, + {"0001"_b, "saddlt_z_zz"}, + {"0010"_b, "uaddlb_z_zz"}, + {"0011"_b, "uaddlt_z_zz"}, + {"0100"_b, "ssublb_z_zz"}, + {"0101"_b, "ssublt_z_zz"}, + {"0110"_b, "usublb_z_zz"}, + {"0111"_b, "usublt_z_zz"}, + {"1100"_b, "sabdlb_z_zz"}, + {"1101"_b, "sabdlt_z_zz"}, + {"1110"_b, "uabdlb_z_zz"}, + {"1111"_b, "uabdlt_z_zz"}, + }, + }, + + { "_lrptrn", + {30, 23, 13, 12, 11, 10}, + { {"100001"_b, "sri_asisdshf_r"}, + {"100101"_b, "sli_asisdshf_r"}, + {"101001"_b, "sqshlu_asisdshf_r"}, + {"101101"_b, "uqshl_asisdshf_r"}, + }, + }, + + { "_lrqlrg", + {30}, + { {"1"_b, "_ylhgrh"}, + }, + }, + + { "_lspzrv", + {30, 23, 13}, + { {"000"_b, "ld1sb_z_p_bz_s_x32_unscaled"}, + {"001"_b, "ldff1sb_z_p_bz_s_x32_unscaled"}, + {"010"_b, "ld1sh_z_p_bz_s_x32_unscaled"}, + {"011"_b, "ldff1sh_z_p_bz_s_x32_unscaled"}, + {"100"_b, "ld1sb_z_p_bz_d_x32_unscaled"}, + {"101"_b, "ldff1sb_z_p_bz_d_x32_unscaled"}, + {"110"_b, "ld1sh_z_p_bz_d_x32_unscaled"}, + {"111"_b, "ldff1sh_z_p_bz_d_x32_unscaled"}, + }, + }, + + { "_lsqgkk", + {30}, + { {"1"_b, "_jsqvtn"}, + }, + }, + + { "_lssjyz", + {30}, + { {"1"_b, "_kxtqjh"}, + }, + }, + + { "_lszlkq", + {22, 20, 19, 18, 17, 16, 13, 12}, + { {"01111100"_b, "_xtgmvr"}, + }, + }, + + { "_ltrntg", + {12}, + { {"0"_b, "udot_asimdelem_d"}, + {"1"_b, "sqrdmlsh_asimdelem_r"}, + }, + }, + + { "_lvjtlg", + {30, 11, 10}, + { {"000"_b, "_krvxxx"}, + {"001"_b, "_rpjrhs"}, + {"010"_b, "_tsypsz"}, + {"011"_b, "_ktsgth"}, + {"100"_b, "_yhnqyy"}, + {"101"_b, "_xzqmkv"}, + {"110"_b, "_vxqtkl"}, + {"111"_b, "_jggxjz"}, + }, + }, + + { "_lvryvp", + {30}, + { {"0"_b, "_gkqhyz"}, + {"1"_b, "_nzqxrj"}, + }, + }, + + { "_lvsrnj", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "scvtf_asimdmiscfp16_r"}, + {"0x00001"_b, "scvtf_asimdmisc_r"}, + {"1111000"_b, "fcmeq_asimdmiscfp16_fz"}, + {"1111001"_b, "frecpe_asimdmiscfp16_r"}, + {"1x00000"_b, "fcmeq_asimdmisc_fz"}, + {"1x00001"_b, "frecpe_asimdmisc_r"}, + }, + }, + + { "_lvszgj", + {2, 1}, + { {"11"_b, "brabz_64_branch_reg"}, + }, + }, + + { "_lxggmz", + {30}, + { {"0"_b, "b_only_branch_imm"}, + }, + }, + + { "_lxhlkx", + {12, 11, 10}, + { {"000"_b, "ftmad_z_zzi"}, + }, + }, + + { "_lxlqks", + {19}, + { {"1"_b, "sysp_cr_syspairinstrs"}, + }, + }, + + { "_lylpyx", + {10}, + { {"0"_b, "sabalb_z_zzz"}, + {"1"_b, "sabalt_z_zzz"}, + }, + }, + + { "_lymhlk", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_vpgxgk"}, + }, + }, + + { "_lynsgm", + {13}, + { {"0"_b, "_ttplgp"}, + }, + }, + + { "_lytkrx", + {12, 11, 10}, + { {"000"_b, "dup_z_zi"}, + {"010"_b, "tbl_z_zz_2"}, + {"011"_b, "tbx_z_zz"}, + {"100"_b, "tbl_z_zz_1"}, + {"110"_b, "_ylnsvy"}, + }, + }, + + { "_lyzhrq", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtms_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtms_asimdmisc_r"}, + {"1111001"_b, "fcvtzs_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtzs_asimdmisc_r"}, + {"xx00000"_b, "abs_asimdmisc_r"}, + {"xx10001"_b, "addv_asimdall_only"}, + }, + }, + + { "_lzjyhm", + {30}, + { {"0"_b, "ldapursw_64_ldapstl_unscaled"}, + }, + }, + + { "_lzqxgt", + {13, 12}, + { {"00"_b, "sbcs_32_addsub_carry"}, + }, + }, + + { "_lzvxxj", + {23, 22}, + { {"01"_b, "fcmeq_asimdsamefp16_only"}, + }, + }, + + { "_lzzsyj", + {18, 17}, + { {"0x"_b, "st3_asisdlsep_r3_r"}, + {"10"_b, "st3_asisdlsep_r3_r"}, + {"11"_b, "st3_asisdlsep_i3_i"}, + }, + }, + + { "_mgjhts", + {13, 12, 10}, + { {"001"_b, "_rvtxys"}, + {"010"_b, "_ppyynh"}, + {"011"_b, "_vvyjmh"}, + {"101"_b, "_rpplns"}, + {"110"_b, "sqdmlal_asisdelem_l"}, + {"111"_b, "_ymmhtq"}, + }, + }, + + { "_mgspnm", + {30, 23}, + { {"00"_b, "orr_64_log_imm"}, + {"10"_b, "ands_64s_log_imm"}, + {"11"_b, "movk_64_movewide"}, + }, + }, + + { "_mgtxyt", + {13, 12}, + { {"00"_b, "sbcs_64_addsub_carry"}, + }, + }, + + { "_mhksnq", + {23, 22, 20, 19, 11}, + { {"00010"_b, "ucvtf_asisdshf_c"}, + {"001x0"_b, "ucvtf_asisdshf_c"}, + {"01xx0"_b, "ucvtf_asisdshf_c"}, + }, + }, + + { "_mhljkp", + {23}, + { {"0"_b, "frecps_asimdsame_only"}, + {"1"_b, "frsqrts_asimdsame_only"}, + }, + }, + + { "_mhnlsy", + {11, 10}, + { {"00"_b, "sm3tt1a_vvv4_crypto3_imm2"}, + {"01"_b, "sm3tt1b_vvv4_crypto3_imm2"}, + {"10"_b, "sm3tt2a_vvv4_crypto3_imm2"}, + {"11"_b, "sm3tt2b_vvv_crypto3_imm2"}, + }, + }, + + { "_mhpgjx", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stlr_sl64_ldstexcl"}, + }, + }, + + { "_mhrjvp", + {30, 13}, + { {"00"_b, "_vxhgzz"}, + {"01"_b, "_lytkrx"}, + {"10"_b, "_rlyvpn"}, + {"11"_b, "_yvptvx"}, + }, + }, + + { "_mjjhqj", + {30, 23, 22, 19, 16}, + { {"10010"_b, "aesimc_b_cryptoaes"}, + {"x0x01"_b, "fcvtl_asimdmisc_l"}, + {"xxx00"_b, "sqabs_asimdmisc_r"}, + }, + }, + + { "_mjrlkp", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "movi_asimdimm_l_hl"}, + {"00x100"_b, "shrn_asimdshf_n"}, + {"00x101"_b, "rshrn_asimdshf_n"}, + {"00x110"_b, "sshll_asimdshf_l"}, + {"010x00"_b, "shrn_asimdshf_n"}, + {"010x01"_b, "rshrn_asimdshf_n"}, + {"010x10"_b, "sshll_asimdshf_l"}, + {"011100"_b, "shrn_asimdshf_n"}, + {"011101"_b, "rshrn_asimdshf_n"}, + {"011110"_b, "sshll_asimdshf_l"}, + {"0x1000"_b, "shrn_asimdshf_n"}, + {"0x1001"_b, "rshrn_asimdshf_n"}, + {"0x1010"_b, "sshll_asimdshf_l"}, + }, + }, + + { "_mjrqhl", + {18, 17}, + { {"0x"_b, "st3_asisdlsop_sx3_r3s"}, + {"10"_b, "st3_asisdlsop_sx3_r3s"}, + {"11"_b, "st3_asisdlsop_s3_i3s"}, + }, + }, + + { "_mjstgz", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "ldgm_64bulk_ldsttags"}, + }, + }, + + { "_mjyhsl", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldxp_lp32_ldstexcl"}, + }, + }, + + { "_mkgsly", + {19, 18, 17, 16, 4}, + { {"00000"_b, "brkas_p_p_p_z"}, + {"10000"_b, "brkns_p_p_pp"}, + }, + }, + + { "_mkrgxr", + {23, 4}, + { {"00"_b, "_hptkrj"}, + }, + }, + + { "_mkyyng", + {23, 22}, + { {"01"_b, "fcmla_asimdelem_c_h"}, + {"10"_b, "fcmla_asimdelem_c_s"}, + }, + }, + + { "_mkzysy", + {30, 23, 22}, + { {"000"_b, "str_b_ldst_pos"}, + {"001"_b, "ldr_b_ldst_pos"}, + {"010"_b, "str_q_ldst_pos"}, + {"011"_b, "ldr_q_ldst_pos"}, + {"100"_b, "str_h_ldst_pos"}, + {"101"_b, "ldr_h_ldst_pos"}, + }, + }, + + { "_mlgmqm", + {18, 17}, + { {"00"_b, "st2_asisdlso_s2_2s"}, + }, + }, + + { "_mlxtxs", + {10}, + { {"0"_b, "ssra_z_zi"}, + {"1"_b, "usra_z_zi"}, + }, + }, + + { "_mmgpkx", + {13, 12}, + { {"11"_b, "cmgt_asisdsame_only"}, + }, + }, + + { "_mmxgrt", + {20, 19, 18, 17, 16}, + { {"00000"_b, "rev32_asimdmisc_r"}, + }, + }, + + { "_mnmtql", + {10}, + { {"0"_b, "srsra_z_zi"}, + {"1"_b, "ursra_z_zi"}, + }, + }, + + { "_mntnlr", + {18}, + { {"0"_b, "ld1_asisdlse_r4_4v"}, + }, + }, + + { "_mnxgml", + {22}, + { {"0"_b, "ldrsw_64_ldst_regoff"}, + }, + }, + + { "_mnxgqm", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmadd_s_floatdp3"}, + {"001xxxx"_b, "fnmadd_d_floatdp3"}, + {"011xxxx"_b, "fnmadd_h_floatdp3"}, + {"10001x0"_b, "fmls_asisdelem_rh_h"}, + {"10x0101"_b, "shl_asisdshf_r"}, + {"10x1101"_b, "sqshl_asisdshf_r"}, + {"11x01x0"_b, "fmls_asisdelem_r_sd"}, + {"1xx11x0"_b, "sqdmlsl_asisdelem_l"}, + }, + }, + + { "_mnzgkx", + {12}, + { {"0"_b, "st1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_mnzzhk", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stlr_sl32_ldstexcl"}, + }, + }, + + { "_mphkpq", + {12}, + { {"0"_b, "st1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_mpstrr", + {23, 22, 8, 7, 6, 5, 4, 3, 2, 1, 0}, + { {"00000000000"_b, "setffr_f"}, + }, + }, + + { "_mpvsng", + {30}, + { {"0"_b, "_vvtnrv"}, + {"1"_b, "_yykhjv"}, + }, + }, + + { "_mpytmv", + {23, 22, 20, 19, 11}, + { {"00011"_b, "fcvtzu_asisdshf_c"}, + {"001x1"_b, "fcvtzu_asisdshf_c"}, + {"01xx1"_b, "fcvtzu_asisdshf_c"}, + }, + }, + + { "_mqljmr", + {2, 1, 0}, + { {"000"_b, "_rnphqp"}, + }, + }, + + { "_mqmrng", + {9, 8, 7, 6, 5, 2, 1}, + { {"1111100"_b, "eret_64e_branch_reg"}, + }, + }, + + { "_mqrzzk", + {22, 20, 11}, + { {"000"_b, "sqincw_z_zs"}, + {"001"_b, "sqdecw_z_zs"}, + {"010"_b, "incw_z_zs"}, + {"100"_b, "sqincd_z_zs"}, + {"101"_b, "sqdecd_z_zs"}, + {"110"_b, "incd_z_zs"}, + }, + }, + + { "_mqssgy", + {30}, + { {"0"_b, "_slzrtr"}, + {"1"_b, "_nsgxlz"}, + }, + }, + + { "_mqtgvk", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"}, + {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"}, + {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"}, + {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"}, + {"100xx00"_b, "stlurh_32_ldapstl_unscaled"}, + {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"}, + {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"}, + {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"}, + {"x000001"_b, "cpyfp_cpy_memcms"}, + {"x000101"_b, "cpyfpwt_cpy_memcms"}, + {"x001001"_b, "cpyfprt_cpy_memcms"}, + {"x001101"_b, "cpyfpt_cpy_memcms"}, + {"x010001"_b, "cpyfm_cpy_memcms"}, + {"x010101"_b, "cpyfmwt_cpy_memcms"}, + {"x011001"_b, "cpyfmrt_cpy_memcms"}, + {"x011101"_b, "cpyfmt_cpy_memcms"}, + {"x100001"_b, "cpyfe_cpy_memcms"}, + {"x100101"_b, "cpyfewt_cpy_memcms"}, + {"x101001"_b, "cpyfert_cpy_memcms"}, + {"x101101"_b, "cpyfet_cpy_memcms"}, + {"x110001"_b, "setp_set_memcms"}, + {"x110101"_b, "setpt_set_memcms"}, + {"x111001"_b, "setpn_set_memcms"}, + {"x111101"_b, "setptn_set_memcms"}, + }, + }, + + { "_mrhtxt", + {23, 22, 20, 9}, + { {"0000"_b, "brkpb_p_p_pp"}, + {"0100"_b, "brkpbs_p_p_pp"}, + }, + }, + + { "_mrlpxr", + {30, 23, 22}, + { {"000"_b, "_vqzsgg"}, + {"001"_b, "_tzjyhy"}, + {"011"_b, "_grsnms"}, + {"100"_b, "_sknvhk"}, + {"101"_b, "_ptqtmp"}, + {"111"_b, "_kktzst"}, + }, + }, + + { "_msnshr", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmls_asimdelem_rh_h"}, + {"0x0101"_b, "shl_asimdshf_r"}, + {"0x1101"_b, "sqshl_asimdshf_r"}, + {"1000x0"_b, "fmlsl_asimdelem_lh"}, + {"1x01x0"_b, "fmls_asimdelem_r_sd"}, + {"xx10x0"_b, "smlsl_asimdelem_l"}, + {"xx11x0"_b, "sqdmlsl_asimdelem_l"}, + }, + }, + + { "_msvhjv", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_d_floatimm"}, + }, + }, + + { "_msvjxq", + {20, 19, 18, 17, 16}, + { {"00001"_b, "sqxtun_asisdmisc_n"}, + }, + }, + + { "_msyrjz", + {13, 12, 11, 10}, + { {"1111"_b, "casal_c64_ldstexcl"}, + }, + }, + + { "_mthlnv", + {18}, + { {"0"_b, "ld1_asisdlsep_r4_r4"}, + {"1"_b, "ld1_asisdlsep_i4_i4"}, + }, + }, + + { "_mtkhgz", + {10}, + { {"0"_b, "sha512su0_vv2_cryptosha512_2"}, + {"1"_b, "sm4e_vv4_cryptosha512_2"}, + }, + }, + + { "_mtlxqp", + {30, 23, 22}, + { {"000"_b, "stnp_64_ldstnapair_offs"}, + {"001"_b, "ldnp_64_ldstnapair_offs"}, + {"010"_b, "stp_64_ldstpair_post"}, + {"011"_b, "ldp_64_ldstpair_post"}, + }, + }, + + { "_mtshvn", + {18}, + { {"0"_b, "ld1_asisdlso_b1_1b"}, + }, + }, + + { "_mtzhrn", + {30, 23, 22, 11, 10, 4}, + { {"001000"_b, "ccmn_64_condcmp_reg"}, + {"001100"_b, "ccmn_64_condcmp_imm"}, + {"101000"_b, "ccmp_64_condcmp_reg"}, + {"101100"_b, "ccmp_64_condcmp_imm"}, + }, + }, + + { "_mvqkzv", + {18, 17, 12}, + { {"000"_b, "st2_asisdlso_d2_2d"}, + }, + }, + + { "_mvvngm", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "mvni_asimdimm_l_sl"}, + {"00x100"_b, "ushr_asimdshf_r"}, + {"00x110"_b, "urshr_asimdshf_r"}, + {"010x00"_b, "ushr_asimdshf_r"}, + {"010x10"_b, "urshr_asimdshf_r"}, + {"011100"_b, "ushr_asimdshf_r"}, + {"011110"_b, "urshr_asimdshf_r"}, + {"0x1000"_b, "ushr_asimdshf_r"}, + {"0x1010"_b, "urshr_asimdshf_r"}, + }, + }, + + { "_mxgykv", + {19, 18, 17, 16}, + { {"0000"_b, "cntp_r_p_p"}, + {"1000"_b, "_lynsgm"}, + {"1001"_b, "_jxyskn"}, + {"1010"_b, "_jmxstz"}, + {"1011"_b, "_yjzknm"}, + {"1100"_b, "_zmtkvx"}, + {"1101"_b, "_yhmlxk"}, + }, + }, + + { "_mxnzst", + {30}, + { {"0"_b, "_vghjnt"}, + {"1"_b, "_pkqvxk"}, + }, + }, + + { "_mxnzyr", + {19, 16}, + { {"00"_b, "_nhxxmh"}, + {"10"_b, "_qgymsy"}, + {"11"_b, "_gjprmg"}, + }, + }, + + { "_mxplnn", + {30, 23, 22}, + { {"000"_b, "stnp_s_ldstnapair_offs"}, + {"001"_b, "ldnp_s_ldstnapair_offs"}, + {"010"_b, "stp_s_ldstpair_post"}, + {"011"_b, "ldp_s_ldstpair_post"}, + {"100"_b, "stnp_d_ldstnapair_offs"}, + {"101"_b, "ldnp_d_ldstnapair_offs"}, + {"110"_b, "stp_d_ldstpair_post"}, + {"111"_b, "ldp_d_ldstpair_post"}, + }, + }, + + { "_mxvjxx", + {20, 19, 18, 16}, + { {"0000"_b, "_nshjhk"}, + }, + }, + + { "_mylphg", + {30, 13, 4}, + { {"000"_b, "cmpge_p_p_zw"}, + {"001"_b, "cmpgt_p_p_zw"}, + {"010"_b, "cmplt_p_p_zw"}, + {"011"_b, "cmple_p_p_zw"}, + {"1xx"_b, "fcmla_z_p_zzz"}, + }, + }, + + { "_myrkmk", + {16, 13, 12}, + { {"000"_b, "rev32_64_dp_1src"}, + {"001"_b, "ctz_64_dp_1src"}, + {"100"_b, "pacda_64p_dp_1src"}, + {"101"_b, "autda_64p_dp_1src"}, + {"110"_b, "_tnjhxp"}, + {"111"_b, "_qqjtpm"}, + }, + }, + + { "_myvqtn", + {12}, + { {"0"_b, "_yrgzqr"}, + }, + }, + + { "_myzhml", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "stgm_64bulk_ldsttags"}, + }, + }, + + { "_mzhsrq", + {4}, + { {"0"_b, "cmplt_p_p_zi"}, + {"1"_b, "cmple_p_p_zi"}, + }, + }, + + { "_mzkxzm", + {1}, + { {"0"_b, "blr_64_branch_reg"}, + }, + }, + + { "_nghmrp", + {13, 12, 11, 10}, + { {"1111"_b, "casal_c32_ldstexcl"}, + }, + }, + + { "_ngkgsg", + {23, 22, 20, 19, 11}, + { {"00000"_b, "movi_asimdimm_l_sl"}, + }, + }, + + { "_ngnxrx", + {18}, + { {"0"_b, "ld1_asisdlse_r2_2v"}, + }, + }, + + { "_ngtlpz", + {18, 17, 12}, + { {"0x0"_b, "st3_asisdlsop_dx3_r3d"}, + {"100"_b, "st3_asisdlsop_dx3_r3d"}, + {"110"_b, "st3_asisdlsop_d3_i3d"}, + }, + }, + + { "_ngttyj", + {30, 23, 22, 13}, + { {"0000"_b, "ld1b_z_p_br_u16"}, + {"0001"_b, "ldff1b_z_p_br_u16"}, + {"0010"_b, "ld1b_z_p_br_u64"}, + {"0011"_b, "ldff1b_z_p_br_u64"}, + {"0100"_b, "ld1h_z_p_br_u16"}, + {"0101"_b, "ldff1h_z_p_br_u16"}, + {"0110"_b, "ld1h_z_p_br_u64"}, + {"0111"_b, "ldff1h_z_p_br_u64"}, + {"1001"_b, "st2b_z_p_br_contiguous"}, + {"1011"_b, "st4b_z_p_br_contiguous"}, + {"10x0"_b, "st1b_z_p_br"}, + {"1101"_b, "st2h_z_p_br_contiguous"}, + {"1111"_b, "st4h_z_p_br_contiguous"}, + {"11x0"_b, "st1h_z_p_br"}, + }, + }, + + { "_ngvqhs", + {13, 12, 11, 10}, + { {"0001"_b, "ushl_asisdsame_only"}, + {"0010"_b, "_vrxhss"}, + {"0011"_b, "uqshl_asisdsame_only"}, + {"0101"_b, "urshl_asisdsame_only"}, + {"0111"_b, "uqrshl_asisdsame_only"}, + {"1010"_b, "_xprqgs"}, + {"1110"_b, "_yskyrg"}, + }, + }, + + { "_ngzyqj", + {11, 10}, + { {"00"_b, "asr_z_zi"}, + {"01"_b, "lsr_z_zi"}, + {"11"_b, "lsl_z_zi"}, + }, + }, + + { "_nhnhzp", + {23, 22, 20, 19, 17, 16, 13}, + { {"0000000"_b, "_hrymnk"}, + {"0000001"_b, "_hmgzjl"}, + {"0100000"_b, "_nxmgqz"}, + {"0100001"_b, "_ssjrxs"}, + {"100xxx0"_b, "st1_asisdlsop_hx1_r1h"}, + {"100xxx1"_b, "st3_asisdlsop_hx3_r3h"}, + {"1010xx0"_b, "st1_asisdlsop_hx1_r1h"}, + {"1010xx1"_b, "st3_asisdlsop_hx3_r3h"}, + {"10110x0"_b, "st1_asisdlsop_hx1_r1h"}, + {"10110x1"_b, "st3_asisdlsop_hx3_r3h"}, + {"1011100"_b, "st1_asisdlsop_hx1_r1h"}, + {"1011101"_b, "st3_asisdlsop_hx3_r3h"}, + {"1011110"_b, "_jyzhnh"}, + {"1011111"_b, "_qzlvkm"}, + {"110xxx0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"110xxx1"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1110xx0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"1110xx1"_b, "ld3_asisdlsop_hx3_r3h"}, + {"11110x0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"11110x1"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1111100"_b, "ld1_asisdlsop_hx1_r1h"}, + {"1111101"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1111110"_b, "_zmkntq"}, + {"1111111"_b, "_rxhssh"}, + }, + }, + + { "_nhrkqm", + {22, 20, 19, 18, 17, 16}, + { {"111001"_b, "ucvtf_asisdmiscfp16_r"}, + {"x00001"_b, "ucvtf_asisdmisc_r"}, + {"x10000"_b, "faddp_asisdpair_only_sd"}, + }, + }, + + { "_nhxxmh", + {23, 22, 9, 3, 2, 1, 0}, + { {"0100000"_b, "ptest_p_p"}, + }, + }, + + { "_njjlxy", + {30, 23, 22}, + { {"000"_b, "stlxp_sp32_ldstexcl"}, + {"001"_b, "_ymvzyh"}, + {"010"_b, "_nxttqn"}, + {"011"_b, "_nghmrp"}, + {"100"_b, "stlxp_sp64_ldstexcl"}, + {"101"_b, "_hpqkhv"}, + {"110"_b, "_xspjzn"}, + {"111"_b, "_msyrjz"}, + }, + }, + + { "_njngkk", + {23, 22, 9, 8, 7, 6, 5}, + { {"0000000"_b, "rdffr_p_f"}, + }, + }, + + { "_njnsqm", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frintn_asimdmiscfp16_r"}, + {"0x00001"_b, "frintn_asimdmisc_r"}, + {"1111001"_b, "frintp_asimdmiscfp16_r"}, + {"1x00001"_b, "frintp_asimdmisc_r"}, + {"xx00000"_b, "cmgt_asimdmisc_z"}, + }, + }, + + { "_njvkjq", + {11, 10}, + { {"00"_b, "index_z_ii"}, + {"01"_b, "index_z_ri"}, + {"10"_b, "index_z_ir"}, + {"11"_b, "index_z_rr"}, + }, + }, + + { "_nklqly", + {13, 12, 11, 10}, + { {"0000"_b, "sha256h_qqv_cryptosha3"}, + {"0100"_b, "sha256h2_qqv_cryptosha3"}, + {"1000"_b, "sha256su1_vvv_cryptosha3"}, + }, + }, + + { "_nklvmv", + {30, 23, 22, 13, 12, 11, 10}, + { {"1011001"_b, "fcmge_asisdsamefp16_only"}, + {"1011011"_b, "facge_asisdsamefp16_only"}, + {"1110101"_b, "fabd_asisdsamefp16_only"}, + {"1111001"_b, "fcmgt_asisdsamefp16_only"}, + {"1111011"_b, "facgt_asisdsamefp16_only"}, + }, + }, + + { "_nklyky", + {18, 17, 12}, + { {"000"_b, "st1_asisdlso_d1_1d"}, + }, + }, + + { "_nkmkvz", + {18}, + { {"0"_b, "st3_asisdlsop_bx3_r3b"}, + {"1"_b, "st3_asisdlsop_b3_i3b"}, + }, + }, + + { "_nknntn", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtns_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtns_asimdmisc_r"}, + {"1111001"_b, "fcvtps_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtps_asimdmisc_r"}, + {"xx00000"_b, "cmlt_asimdmisc_z"}, + {"xx10000"_b, "smaxv_asimdall_only"}, + {"xx10001"_b, "sminv_asimdall_only"}, + }, + }, + + { "_nkpyjg", + {23, 20, 19, 18, 17, 16}, + { {"000001"_b, "frint32x_asimdmisc_r"}, + }, + }, + + { "_nktrpj", + {23, 22, 12}, + { {"001"_b, "sudot_asimdelem_d"}, + {"011"_b, "bfdot_asimdelem_e"}, + {"101"_b, "usdot_asimdelem_d"}, + {"111"_b, "bfmlal_asimdelem_f"}, + {"xx0"_b, "sdot_asimdelem_d"}, + }, + }, + + { "_nkxhsy", + {22, 20, 11}, + { {"000"_b, "cntb_r_s"}, + {"010"_b, "incb_r_rs"}, + {"100"_b, "cnth_r_s"}, + {"110"_b, "inch_r_rs"}, + }, + }, + + { "_nkyrpv", + {30, 23, 13, 12, 11, 10}, + { {"101001"_b, "ucvtf_asisdshf_c"}, + {"101111"_b, "fcvtzu_asisdshf_c"}, + {"1x01x0"_b, "sqrdmlah_asisdelem_r"}, + {"1x11x0"_b, "sqrdmlsh_asisdelem_r"}, + }, + }, + + { "_nkyynq", + {23, 22, 20, 19, 17, 16}, + { {"000010"_b, "scvtf_s32_float2fix"}, + {"000011"_b, "ucvtf_s32_float2fix"}, + {"001100"_b, "fcvtzs_32s_float2fix"}, + {"001101"_b, "fcvtzu_32s_float2fix"}, + {"010010"_b, "scvtf_d32_float2fix"}, + {"010011"_b, "ucvtf_d32_float2fix"}, + {"011100"_b, "fcvtzs_32d_float2fix"}, + {"011101"_b, "fcvtzu_32d_float2fix"}, + {"110010"_b, "scvtf_h32_float2fix"}, + {"110011"_b, "ucvtf_h32_float2fix"}, + {"111100"_b, "fcvtzs_32h_float2fix"}, + {"111101"_b, "fcvtzu_32h_float2fix"}, + }, + }, + + { "_nlpmvl", + {30, 13}, + { {"00"_b, "mad_z_p_zzz"}, + {"01"_b, "msb_z_p_zzz"}, + }, + }, + + { "_nlrjsj", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_s_ldapstl_simd"}, + {"001xx10"_b, "ldapur_s_ldapstl_simd"}, + {"100xx10"_b, "stlur_d_ldapstl_simd"}, + {"101xx10"_b, "ldapur_d_ldapstl_simd"}, + {"x000001"_b, "cpypn_cpy_memcms"}, + {"x000101"_b, "cpypwtn_cpy_memcms"}, + {"x001001"_b, "cpyprtn_cpy_memcms"}, + {"x001101"_b, "cpyptn_cpy_memcms"}, + {"x010001"_b, "cpymn_cpy_memcms"}, + {"x010101"_b, "cpymwtn_cpy_memcms"}, + {"x011001"_b, "cpymrtn_cpy_memcms"}, + {"x011101"_b, "cpymtn_cpy_memcms"}, + {"x100001"_b, "cpyen_cpy_memcms"}, + {"x100101"_b, "cpyewtn_cpy_memcms"}, + {"x101001"_b, "cpyertn_cpy_memcms"}, + {"x101101"_b, "cpyetn_cpy_memcms"}, + }, + }, + + { "_nmqrtr", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmul_asimdelem_rh_h"}, + {"0x0001"_b, "shrn_asimdshf_n"}, + {"0x0011"_b, "rshrn_asimdshf_n"}, + {"0x0101"_b, "sqshrn_asimdshf_n"}, + {"0x0111"_b, "sqrshrn_asimdshf_n"}, + {"0x1001"_b, "sshll_asimdshf_l"}, + {"1x01x0"_b, "fmul_asimdelem_r_sd"}, + {"xx00x0"_b, "mul_asimdelem_r"}, + {"xx10x0"_b, "smull_asimdelem_l"}, + {"xx11x0"_b, "sqdmull_asimdelem_l"}, + }, + }, + + { "_nmqskh", + {23, 22, 20, 19, 16, 13, 12}, + { {"0000000"_b, "_xkznrh"}, + {"0000010"_b, "_svlrvy"}, + {"0000011"_b, "_prmjlz"}, + {"0100000"_b, "_lgmlmt"}, + {"0100010"_b, "_qhpkhm"}, + {"0100011"_b, "_sqlsyr"}, + {"100xx00"_b, "st3_asisdlsep_r3_r"}, + {"100xx10"_b, "st1_asisdlsep_r3_r3"}, + {"100xx11"_b, "st1_asisdlsep_r1_r1"}, + {"1010x00"_b, "st3_asisdlsep_r3_r"}, + {"1010x10"_b, "st1_asisdlsep_r3_r3"}, + {"1010x11"_b, "st1_asisdlsep_r1_r1"}, + {"1011000"_b, "st3_asisdlsep_r3_r"}, + {"1011010"_b, "st1_asisdlsep_r3_r3"}, + {"1011011"_b, "st1_asisdlsep_r1_r1"}, + {"1011100"_b, "_lzzsyj"}, + {"1011110"_b, "_xqvzvl"}, + {"1011111"_b, "_vxrnyh"}, + {"110xx00"_b, "ld3_asisdlsep_r3_r"}, + {"110xx10"_b, "ld1_asisdlsep_r3_r3"}, + {"110xx11"_b, "ld1_asisdlsep_r1_r1"}, + {"1110x00"_b, "ld3_asisdlsep_r3_r"}, + {"1110x10"_b, "ld1_asisdlsep_r3_r3"}, + {"1110x11"_b, "ld1_asisdlsep_r1_r1"}, + {"1111000"_b, "ld3_asisdlsep_r3_r"}, + {"1111010"_b, "ld1_asisdlsep_r3_r3"}, + {"1111011"_b, "ld1_asisdlsep_r1_r1"}, + {"1111100"_b, "_ntxnpq"}, + {"1111110"_b, "_ghmtnl"}, + {"1111111"_b, "_gzrtkk"}, + }, + }, + + { "_nnkxgr", + {11, 10}, + { {"00"_b, "ftssel_z_zz"}, + {"10"_b, "_yhlntp"}, + {"11"_b, "_rsqmgk"}, + }, + }, + + { "_nnrtpm", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stllrb_sl32_ldstexcl"}, + }, + }, + + { "_nntvzj", + {11, 10, 9, 8, 7, 6}, + { {"000000"_b, "nop_hi_hints"}, + {"000001"_b, "wfe_hi_hints"}, + {"000010"_b, "sev_hi_hints"}, + {"000011"_b, "dgh_hi_hints"}, + {"000100"_b, "pacia1716_hi_hints"}, + {"000101"_b, "pacib1716_hi_hints"}, + {"000110"_b, "autia1716_hi_hints"}, + {"000111"_b, "autib1716_hi_hints"}, + {"001000"_b, "esb_hi_hints"}, + {"001001"_b, "tsb_hc_hints"}, + {"001010"_b, "csdb_hi_hints"}, + {"001011"_b, "clrbhb_hi_hints"}, + {"001100"_b, "paciaz_hi_hints"}, + {"001101"_b, "pacibz_hi_hints"}, + {"001110"_b, "autiaz_hi_hints"}, + {"001111"_b, "autibz_hi_hints"}, + {"0100xx"_b, "bti_hb_hints"}, + {"010100"_b, "chkfeat_hi_hints"}, + {"0101x1"_b, "hint_hm_hints"}, + {"01x110"_b, "hint_hm_hints"}, + {"10xxxx"_b, "hint_hm_hints"}, + {"110xxx"_b, "hint_hm_hints"}, + {"111110"_b, "hint_hm_hints"}, + {"x110xx"_b, "hint_hm_hints"}, + {"x1110x"_b, "hint_hm_hints"}, + {"x11111"_b, "hint_hm_hints"}, + }, + }, + + { "_nnzhgm", + {19, 18, 17, 16, 4}, + { {"0000x"_b, "brka_p_p_p"}, + {"10000"_b, "brkn_p_p_pp"}, + }, + }, + + { "_npjnlv", + {20, 19, 18, 17}, + { {"0000"_b, "_kzyzrh"}, + }, + }, + + { "_npxkzq", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000000"_b, "_tykvnx"}, + }, + }, + + { "_nqjtqn", + {23, 22}, + { {"00"_b, "dup_asimdins_dv_v"}, + {"01"_b, "fmaxnm_asimdsamefp16_only"}, + {"11"_b, "fminnm_asimdsamefp16_only"}, + }, + }, + + { "_nqjvmr", + {13, 12}, + { {"00"_b, "adcs_32_addsub_carry"}, + }, + }, + + { "_nqkhrv", + {30, 13}, + { {"10"_b, "fnmla_z_p_zzz"}, + {"11"_b, "fnmls_z_p_zzz"}, + }, + }, + + { "_nqlrmv", + {30, 23, 22}, + { {"000"_b, "bfm_32m_bitfield"}, + }, + }, + + { "_nqmnzp", + {30, 23, 22, 20, 19, 18, 17, 16}, + { {"00000000"_b, "udf_only_perm_undef"}, + }, + }, + + { "_nrmlqv", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "orr_asimdimm_l_sl"}, + {"00x100"_b, "ssra_asimdshf_r"}, + {"00x110"_b, "srsra_asimdshf_r"}, + {"010x00"_b, "ssra_asimdshf_r"}, + {"010x10"_b, "srsra_asimdshf_r"}, + {"011100"_b, "ssra_asimdshf_r"}, + {"011110"_b, "srsra_asimdshf_r"}, + {"0x1000"_b, "ssra_asimdshf_r"}, + {"0x1010"_b, "srsra_asimdshf_r"}, + }, + }, + + { "_nsgvsv", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_h_floatimm"}, + }, + }, + + { "_nsgxlz", + {13, 12, 10}, + { {"000"_b, "sqdmulh_asisdelem_r"}, + {"010"_b, "sqrdmulh_asisdelem_r"}, + {"101"_b, "_rkjjtp"}, + {"111"_b, "_pzpxxv"}, + }, + }, + + { "_nshjhk", + {17, 9, 8, 7, 6, 5}, + { {"000000"_b, "aesimc_z_z"}, + {"1xxxxx"_b, "aesd_z_zz"}, + }, + }, + + { "_nsjhhg", + {30, 13}, + { {"00"_b, "_jhllmn"}, + {"01"_b, "_htplsj"}, + {"10"_b, "_rztvnl"}, + {"11"_b, "_vgtnjh"}, + }, + }, + + { "_ntjpsx", + {22, 20, 11}, + { {"000"_b, "uqincb_r_rs_uw"}, + {"001"_b, "uqdecb_r_rs_uw"}, + {"010"_b, "uqincb_r_rs_x"}, + {"011"_b, "uqdecb_r_rs_x"}, + {"100"_b, "uqinch_r_rs_uw"}, + {"101"_b, "uqdech_r_rs_uw"}, + {"110"_b, "uqinch_r_rs_x"}, + {"111"_b, "uqdech_r_rs_x"}, + }, + }, + + { "_ntjrlg", + {18, 17, 16, 13, 12, 11, 10, 9, 7, 6, 5}, + { {"01111000011"_b, "_vsslrs"}, + }, + }, + + { "_ntxnpq", + {18, 17}, + { {"0x"_b, "ld3_asisdlsep_r3_r"}, + {"10"_b, "ld3_asisdlsep_r3_r"}, + {"11"_b, "ld3_asisdlsep_i3_i"}, + }, + }, + + { "_nvkthr", + {30, 13}, + { {"00"_b, "_kjqynn"}, + {"01"_b, "_jgyhrh"}, + {"10"_b, "_jymnkk"}, + {"11"_b, "_pqjjsh"}, + }, + }, + + { "_nvkxzs", + {12}, + { {"0"_b, "gcsstr_64_ldst_gcs"}, + {"1"_b, "gcssttr_64_ldst_gcs"}, + }, + }, + + { "_nvnjyp", + {23, 22, 20, 19, 11}, + { {"00000"_b, "mvni_asimdimm_m_sm"}, + }, + }, + + { "_nvyxmh", + {20, 19, 18, 17, 16}, + { {"00000"_b, "add_z_p_zz"}, + {"00001"_b, "sub_z_p_zz"}, + {"00011"_b, "subr_z_p_zz"}, + {"01000"_b, "smax_z_p_zz"}, + {"01001"_b, "umax_z_p_zz"}, + {"01010"_b, "smin_z_p_zz"}, + {"01011"_b, "umin_z_p_zz"}, + {"01100"_b, "sabd_z_p_zz"}, + {"01101"_b, "uabd_z_p_zz"}, + {"10000"_b, "mul_z_p_zz"}, + {"10010"_b, "smulh_z_p_zz"}, + {"10011"_b, "umulh_z_p_zz"}, + {"10100"_b, "sdiv_z_p_zz"}, + {"10101"_b, "udiv_z_p_zz"}, + {"10110"_b, "sdivr_z_p_zz"}, + {"10111"_b, "udivr_z_p_zz"}, + {"11000"_b, "orr_z_p_zz"}, + {"11001"_b, "eor_z_p_zz"}, + {"11010"_b, "and_z_p_zz"}, + {"11011"_b, "bic_z_p_zz"}, + }, + }, + + { "_nvzsxn", + {18, 17, 12}, + { {"000"_b, "stl1_asisdlso_d1"}, + }, + }, + + { "_nxjkqs", + {23, 22, 12, 11, 10}, + { {"0x000"_b, "fmla_z_zzzi_h"}, + {"0x001"_b, "fmls_z_zzzi_h"}, + {"10000"_b, "fmla_z_zzzi_s"}, + {"10001"_b, "fmls_z_zzzi_s"}, + {"101xx"_b, "fcmla_z_zzzi_h"}, + {"11000"_b, "fmla_z_zzzi_d"}, + {"11001"_b, "fmls_z_zzzi_d"}, + {"111xx"_b, "fcmla_z_zzzi_s"}, + }, + }, + + { "_nxlmhz", + {30, 23}, + { {"00"_b, "add_32_addsub_imm"}, + {"10"_b, "sub_32_addsub_imm"}, + }, + }, + + { "_nxlsjm", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldxrb_lr32_ldstexcl"}, + }, + }, + + { "_nxmgqz", + {18}, + { {"0"_b, "ld1_asisdlso_h1_1h"}, + }, + }, + + { "_nxrqmg", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx00"_b, "stlur_32_ldapstl_unscaled"}, + {"001xx00"_b, "ldapur_32_ldapstl_unscaled"}, + {"010xx00"_b, "ldapursw_64_ldapstl_unscaled"}, + {"100xx00"_b, "stlur_64_ldapstl_unscaled"}, + {"101xx00"_b, "ldapur_64_ldapstl_unscaled"}, + {"x000001"_b, "cpyfpn_cpy_memcms"}, + {"x000101"_b, "cpyfpwtn_cpy_memcms"}, + {"x001001"_b, "cpyfprtn_cpy_memcms"}, + {"x001101"_b, "cpyfptn_cpy_memcms"}, + {"x010001"_b, "cpyfmn_cpy_memcms"}, + {"x010101"_b, "cpyfmwtn_cpy_memcms"}, + {"x011001"_b, "cpyfmrtn_cpy_memcms"}, + {"x011101"_b, "cpyfmtn_cpy_memcms"}, + {"x100001"_b, "cpyfen_cpy_memcms"}, + {"x100101"_b, "cpyfewtn_cpy_memcms"}, + {"x101001"_b, "cpyfertn_cpy_memcms"}, + {"x101101"_b, "cpyfetn_cpy_memcms"}, + }, + }, + + { "_nxttqn", + {13, 12, 11, 10}, + { {"1111"_b, "casl_c32_ldstexcl"}, + }, + }, + + { "_nygsjm", + {18}, + { {"0"_b, "st2_asisdlso_b2_2b"}, + }, + }, + + { "_nyjtng", + {23, 22}, + { {"01"_b, "fmls_z_p_zzz"}, + {"1x"_b, "fmls_z_p_zzz"}, + }, + }, + + { "_nynrns", + {23, 22, 12}, + { {"000"_b, "_klxxgx"}, + {"001"_b, "_pglvnj"}, + {"010"_b, "_pzttrn"}, + {"011"_b, "_svyszp"}, + {"110"_b, "_prrkzv"}, + {"111"_b, "_nsgvsv"}, + }, + }, + + { "_nzmqhv", + {23, 22, 20, 19, 18, 17, 16}, + { {"0x00001"_b, "frint64x_asimdmisc_r"}, + {"0x10000"_b, "fmaxv_asimdall_only_sd"}, + {"1111000"_b, "fneg_asimdmiscfp16_r"}, + {"1111001"_b, "fsqrt_asimdmiscfp16_r"}, + {"1x00000"_b, "fneg_asimdmisc_r"}, + {"1x00001"_b, "fsqrt_asimdmisc_r"}, + {"1x10000"_b, "fminv_asimdall_only_sd"}, + }, + }, + + { "_nzqxrj", + {12}, + { {"1"_b, "_qgvtrn"}, + }, + }, + + { "_nzskzl", + {13, 12, 11, 10}, + { {"0000"_b, "uaddl_asimddiff_l"}, + {"0001"_b, "uhadd_asimdsame_only"}, + {"0010"_b, "_mmxgrt"}, + {"0011"_b, "uqadd_asimdsame_only"}, + {"0100"_b, "uaddw_asimddiff_w"}, + {"0101"_b, "urhadd_asimdsame_only"}, + {"0111"_b, "_yyvnrp"}, + {"1000"_b, "usubl_asimddiff_l"}, + {"1001"_b, "uhsub_asimdsame_only"}, + {"1010"_b, "_vlhkgr"}, + {"1011"_b, "uqsub_asimdsame_only"}, + {"1100"_b, "usubw_asimddiff_w"}, + {"1101"_b, "cmhi_asimdsame_only"}, + {"1110"_b, "_srpptk"}, + {"1111"_b, "cmhs_asimdsame_only"}, + }, + }, + + { "_nzvlzt", + {18}, + { {"0"_b, "st1_asisdlse_r4_4v"}, + }, + }, + + { "_pgjjsz", + {30, 13, 12, 11, 10}, + { {"00000"_b, "_lmyxhr"}, + {"00001"_b, "_tmhlvh"}, + {"00010"_b, "_qvtxpr"}, + {"00011"_b, "_ymkthj"}, + {"00100"_b, "_rhmxyp"}, + {"00101"_b, "_zryvjk"}, + {"01000"_b, "zip1_z_zz"}, + {"01001"_b, "zip2_z_zz"}, + {"01010"_b, "uzp1_z_zz"}, + {"01011"_b, "uzp2_z_zz"}, + {"01100"_b, "trn1_z_zz"}, + {"01101"_b, "trn2_z_zz"}, + {"10000"_b, "_llvrrk"}, + {"10001"_b, "_qyjvqr"}, + {"10010"_b, "_tmtnkq"}, + {"10011"_b, "_gpxltv"}, + {"10100"_b, "_pnlnzt"}, + {"10101"_b, "_pygvrr"}, + {"11000"_b, "addhnb_z_zz"}, + {"11001"_b, "addhnt_z_zz"}, + {"11010"_b, "raddhnb_z_zz"}, + {"11011"_b, "raddhnt_z_zz"}, + {"11100"_b, "subhnb_z_zz"}, + {"11101"_b, "subhnt_z_zz"}, + {"11110"_b, "rsubhnb_z_zz"}, + {"11111"_b, "rsubhnt_z_zz"}, + }, + }, + + { "_pglvnj", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_s_floatimm"}, + }, + }, + + { "_pgmlrt", + {30, 23, 22}, + { {"000"_b, "stxrb_sr32_ldstexcl"}, + {"001"_b, "_nxlsjm"}, + {"010"_b, "_nnrtpm"}, + {"011"_b, "_sksvrn"}, + {"100"_b, "stxrh_sr32_ldstexcl"}, + {"101"_b, "_knpjtt"}, + {"110"_b, "_zqhhlq"}, + {"111"_b, "_xtzykp"}, + }, + }, + + { "_pgvjgs", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_s_ldapstl_simd"}, + {"001xx10"_b, "ldapur_s_ldapstl_simd"}, + {"100xx10"_b, "stlur_d_ldapstl_simd"}, + {"101xx10"_b, "ldapur_d_ldapstl_simd"}, + {"x000001"_b, "cpypwn_cpy_memcms"}, + {"x000101"_b, "cpypwtwn_cpy_memcms"}, + {"x001001"_b, "cpyprtwn_cpy_memcms"}, + {"x001101"_b, "cpyptwn_cpy_memcms"}, + {"x010001"_b, "cpymwn_cpy_memcms"}, + {"x010101"_b, "cpymwtwn_cpy_memcms"}, + {"x011001"_b, "cpymrtwn_cpy_memcms"}, + {"x011101"_b, "cpymtwn_cpy_memcms"}, + {"x100001"_b, "cpyewn_cpy_memcms"}, + {"x100101"_b, "cpyewtwn_cpy_memcms"}, + {"x101001"_b, "cpyertwn_cpy_memcms"}, + {"x101101"_b, "cpyetwn_cpy_memcms"}, + {"x110001"_b, "setgm_set_memcms"}, + {"x110101"_b, "setgmt_set_memcms"}, + {"x111001"_b, "setgmn_set_memcms"}, + {"x111101"_b, "setgmtn_set_memcms"}, + }, + }, + + { "_phjkhr", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autdzb_64z_dp_1src"}, + }, + }, + + { "_phktvp", + {7, 6, 4, 3, 2, 1, 0}, + { {"0111111"_b, "clrex_bn_barriers"}, + {"1011111"_b, "dsb_bo_barriers"}, + {"1111111"_b, "isb_bi_barriers"}, + }, + }, + + { "_phpphm", + {18}, + { {"0"_b, "st4_asisdlso_h4_4h"}, + }, + }, + + { "_phrqqx", + {23, 22, 13}, + { {"100"_b, "fmlal_asimdelem_lh"}, + {"xx1"_b, "smlal_asimdelem_l"}, + }, + }, + + { "_phsrlk", + {23, 22, 13}, + { {"000"_b, "fmla_asimdelem_rh_h"}, + {"1x0"_b, "fmla_asimdelem_r_sd"}, + {"xx1"_b, "sqdmlal_asimdelem_l"}, + }, + }, + + { "_phthqj", + {30, 13}, + { {"00"_b, "_sntyqy"}, + {"01"_b, "_xhlhmh"}, + {"10"_b, "_rtrlts"}, + {"11"_b, "_jzkqhn"}, + }, + }, + + { "_phtxqg", + {13, 10}, + { {"00"_b, "_vrjhtm"}, + {"01"_b, "_spktyg"}, + {"10"_b, "_nktrpj"}, + {"11"_b, "_vzvstm"}, + }, + }, + + { "_pjgkjs", + {18, 17}, + { {"00"_b, "_mxnzyr"}, + }, + }, + + { "_pjhmvy", + {20, 19, 18, 17, 16}, + { {"00000"_b, "saddlp_asimdmisc_p"}, + {"00001"_b, "xtn_asimdmisc_n"}, + }, + }, + + { "_pjlnhh", + {30, 23}, + { {"00"_b, "and_64_log_imm"}, + {"01"_b, "movn_64_movewide"}, + {"10"_b, "eor_64_log_imm"}, + {"11"_b, "movz_64_movewide"}, + }, + }, + + { "_pjskhr", + {18, 17}, + { {"00"_b, "st3_asisdlso_s3_3s"}, + }, + }, + + { "_pjvkjz", + {13, 12}, + { {"00"_b, "sbc_64_addsub_carry"}, + }, + }, + + { "_pkjqsy", + {20, 19, 18, 17, 16, 13, 12}, + { {"0000001"_b, "cnt_32_dp_1src"}, + }, + }, + + { "_pkpvmj", + {13, 12, 11, 10}, + { {"1111"_b, "casa_c64_ldstexcl"}, + }, + }, + + { "_pkqvxk", + {12}, + { {"1"_b, "_ynsytg"}, + }, + }, + + { "_pkskpp", + {30, 23}, + { {"00"_b, "adds_64s_addsub_imm"}, + {"10"_b, "subs_64s_addsub_imm"}, + }, + }, + + { "_plgrmv", + {13, 12}, + { {"00"_b, "adcs_64_addsub_carry"}, + }, + }, + + { "_plrggq", + {23, 22, 13}, + { {"000"_b, "fmls_asimdelem_rh_h"}, + {"1x0"_b, "fmls_asimdelem_r_sd"}, + {"xx1"_b, "sqdmlsl_asimdelem_l"}, + }, + }, + + { "_plyhhz", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cmge_asisdmisc_z"}, + }, + }, + + { "_plymgg", + {18}, + { {"1"_b, "frsqrte_z_z"}, + }, + }, + + { "_plytvr", + {22}, + { {"0"_b, "str_32_ldst_regoff"}, + {"1"_b, "ldr_32_ldst_regoff"}, + }, + }, + + { "_plyxlq", + {30, 18}, + { {"00"_b, "_nkyynq"}, + }, + }, + + { "_plzqrv", + {23, 22, 20, 19, 12, 11, 10}, + { {"00x1001"_b, "sqshrun_asisdshf_n"}, + {"00x1011"_b, "sqrshrun_asisdshf_n"}, + {"00x1101"_b, "uqshrn_asisdshf_n"}, + {"00x1111"_b, "uqrshrn_asisdshf_n"}, + {"00xx1x0"_b, "fmulx_asisdelem_rh_h"}, + {"010x001"_b, "sqshrun_asisdshf_n"}, + {"010x011"_b, "sqrshrun_asisdshf_n"}, + {"010x101"_b, "uqshrn_asisdshf_n"}, + {"010x111"_b, "uqrshrn_asisdshf_n"}, + {"0111001"_b, "sqshrun_asisdshf_n"}, + {"0111011"_b, "sqrshrun_asisdshf_n"}, + {"0111101"_b, "uqshrn_asisdshf_n"}, + {"0111111"_b, "uqrshrn_asisdshf_n"}, + {"0x10001"_b, "sqshrun_asisdshf_n"}, + {"0x10011"_b, "sqrshrun_asisdshf_n"}, + {"0x10101"_b, "uqshrn_asisdshf_n"}, + {"0x10111"_b, "uqrshrn_asisdshf_n"}, + {"1xxx1x0"_b, "fmulx_asisdelem_r_sd"}, + }, + }, + + { "_pmpsvs", + {18, 17, 12}, + { {"000"_b, "ld2_asisdlso_d2_2d"}, + }, + }, + + { "_pnkxsr", + {22, 20}, + { {"00"_b, "_hnsvjh"}, + {"01"_b, "mrs_rs_systemmove"}, + {"11"_b, "mrrs_rs_systemmovepr"}, + }, + }, + + { "_pnlnzt", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtunb_z_zz"}, + }, + }, + + { "_ppnssm", + {30, 13, 12}, + { {"000"_b, "_ktyppm"}, + {"001"_b, "_ngzyqj"}, + {"010"_b, "_yxnslx"}, + {"011"_b, "_nnkxgr"}, + {"100"_b, "_kzmvpk"}, + {"101"_b, "_thrxph"}, + {"110"_b, "_kgpgly"}, + {"111"_b, "_yppszx"}, + }, + }, + + { "_pppsmg", + {30}, + { {"0"_b, "_xyhmgh"}, + {"1"_b, "_rlrjxp"}, + }, + }, + + { "_ppvnly", + {18, 17}, + { {"0x"_b, "ld2_asisdlsop_sx2_r2s"}, + {"10"_b, "ld2_asisdlsop_sx2_r2s"}, + {"11"_b, "ld2_asisdlsop_s2_i2s"}, + }, + }, + + { "_ppyynh", + {23, 22}, + { {"00"_b, "fmla_asisdelem_rh_h"}, + {"1x"_b, "fmla_asisdelem_r_sd"}, + }, + }, + + { "_pqjjsh", + {23, 22, 12, 10}, + { {"1000"_b, "fmlslb_z_zzzi_s"}, + {"1001"_b, "fmlslt_z_zzzi_s"}, + }, + }, + + { "_pqmqrg", + {30, 23, 22}, + { {"000"_b, "stp_s_ldstpair_off"}, + {"001"_b, "ldp_s_ldstpair_off"}, + {"010"_b, "stp_s_ldstpair_pre"}, + {"011"_b, "ldp_s_ldstpair_pre"}, + {"100"_b, "stp_d_ldstpair_off"}, + {"101"_b, "ldp_d_ldstpair_off"}, + {"110"_b, "stp_d_ldstpair_pre"}, + {"111"_b, "ldp_d_ldstpair_pre"}, + }, + }, + + { "_pqsvty", + {13}, + { {"0"_b, "_qqslmv"}, + {"1"_b, "_gjxsrn"}, + }, + }, + + { "_prgrzz", + {30}, + { {"0"_b, "cbnz_32_compbranch"}, + }, + }, + + { "_prjzxs", + {12}, + { {"0"_b, "ld2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_prkmty", + {23, 22, 9}, + { {"000"_b, "brkpa_p_p_pp"}, + {"010"_b, "brkpas_p_p_pp"}, + }, + }, + + { "_prmjlz", + {18, 17}, + { {"00"_b, "st1_asisdlse_r1_1v"}, + }, + }, + + { "_prrkzv", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fmov_h_floatdp1"}, + {"000010"_b, "fneg_h_floatdp1"}, + {"000100"_b, "fcvt_sh_floatdp1"}, + {"001000"_b, "frintn_h_floatdp1"}, + {"001010"_b, "frintm_h_floatdp1"}, + {"001100"_b, "frinta_h_floatdp1"}, + {"001110"_b, "frintx_h_floatdp1"}, + }, + }, + + { "_prtvjm", + {23, 22, 12, 11, 10}, + { {"10000"_b, "fadd_z_zz"}, + {"10001"_b, "fsub_z_zz"}, + {"10010"_b, "fmul_z_zz"}, + {"x1000"_b, "fadd_z_zz"}, + {"x1001"_b, "fsub_z_zz"}, + {"x1010"_b, "fmul_z_zz"}, + {"xx011"_b, "ftsmul_z_zz"}, + {"xx110"_b, "frecps_z_zz"}, + {"xx111"_b, "frsqrts_z_zz"}, + }, + }, + + { "_prxyhr", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autiza_64z_dp_1src"}, + }, + }, + + { "_prytjs", + {18, 4}, + { {"00"_b, "fcmge_p_p_z0"}, + {"01"_b, "fcmgt_p_p_z0"}, + }, + }, + + { "_pstgvl", + {23}, + { {"0"_b, "fmaxnm_asimdsame_only"}, + {"1"_b, "fminnm_asimdsame_only"}, + }, + }, + + { "_ptjyqx", + {13}, + { {"0"_b, "fcmuo_p_p_zz"}, + }, + }, + + { "_ptkgrz", + {22}, + { {"0"_b, "ldrsw_64_ldst_regoff"}, + }, + }, + + { "_ptqtmp", + {13, 12, 11, 10}, + { {"0111"_b, "fmulx_asisdsamefp16_only"}, + {"1001"_b, "fcmeq_asisdsamefp16_only"}, + {"1111"_b, "frecps_asisdsamefp16_only"}, + }, + }, + + { "_ptsjnr", + {30, 20, 19, 18, 17, 16, 13}, + { {"0000000"_b, "asr_z_p_zi"}, + {"0000010"_b, "lsr_z_p_zi"}, + {"0000110"_b, "lsl_z_p_zi"}, + {"0001000"_b, "asrd_z_p_zi"}, + {"0001100"_b, "sqshl_z_p_zi"}, + {"0001110"_b, "uqshl_z_p_zi"}, + {"0011000"_b, "srshr_z_p_zi"}, + {"0011010"_b, "urshr_z_p_zi"}, + {"0011110"_b, "sqshlu_z_p_zi"}, + {"0100000"_b, "asr_z_p_zz"}, + {"0100001"_b, "sxtb_z_p_z"}, + {"0100010"_b, "lsr_z_p_zz"}, + {"0100011"_b, "uxtb_z_p_z"}, + {"0100101"_b, "sxth_z_p_z"}, + {"0100110"_b, "lsl_z_p_zz"}, + {"0100111"_b, "uxth_z_p_z"}, + {"0101000"_b, "asrr_z_p_zz"}, + {"0101001"_b, "sxtw_z_p_z"}, + {"0101010"_b, "lsrr_z_p_zz"}, + {"0101011"_b, "uxtw_z_p_z"}, + {"0101101"_b, "abs_z_p_z"}, + {"0101110"_b, "lslr_z_p_zz"}, + {"0101111"_b, "neg_z_p_z"}, + {"0110000"_b, "asr_z_p_zw"}, + {"0110001"_b, "cls_z_p_z"}, + {"0110010"_b, "lsr_z_p_zw"}, + {"0110011"_b, "clz_z_p_z"}, + {"0110101"_b, "cnt_z_p_z"}, + {"0110110"_b, "lsl_z_p_zw"}, + {"0110111"_b, "cnot_z_p_z"}, + {"0111001"_b, "fabs_z_p_z"}, + {"0111011"_b, "fneg_z_p_z"}, + {"0111101"_b, "not_z_p_z"}, + {"1000001"_b, "urecpe_z_p_z"}, + {"1000011"_b, "ursqrte_z_p_z"}, + {"1000100"_b, "srshl_z_p_zz"}, + {"1000110"_b, "urshl_z_p_zz"}, + {"1001001"_b, "sadalp_z_p_z"}, + {"1001011"_b, "uadalp_z_p_z"}, + {"1001100"_b, "srshlr_z_p_zz"}, + {"1001110"_b, "urshlr_z_p_zz"}, + {"1010000"_b, "sqshl_z_p_zz"}, + {"1010001"_b, "sqabs_z_p_z"}, + {"1010010"_b, "uqshl_z_p_zz"}, + {"1010011"_b, "sqneg_z_p_z"}, + {"1010100"_b, "sqrshl_z_p_zz"}, + {"1010110"_b, "uqrshl_z_p_zz"}, + {"1011000"_b, "sqshlr_z_p_zz"}, + {"1011010"_b, "uqshlr_z_p_zz"}, + {"1011100"_b, "sqrshlr_z_p_zz"}, + {"1011110"_b, "uqrshlr_z_p_zz"}, + {"1100000"_b, "shadd_z_p_zz"}, + {"1100010"_b, "uhadd_z_p_zz"}, + {"1100011"_b, "addp_z_p_zz"}, + {"1100100"_b, "shsub_z_p_zz"}, + {"1100110"_b, "uhsub_z_p_zz"}, + {"1101000"_b, "srhadd_z_p_zz"}, + {"1101001"_b, "smaxp_z_p_zz"}, + {"1101010"_b, "urhadd_z_p_zz"}, + {"1101011"_b, "umaxp_z_p_zz"}, + {"1101100"_b, "shsubr_z_p_zz"}, + {"1101101"_b, "sminp_z_p_zz"}, + {"1101110"_b, "uhsubr_z_p_zz"}, + {"1101111"_b, "uminp_z_p_zz"}, + {"1110000"_b, "sqadd_z_p_zz"}, + {"1110010"_b, "uqadd_z_p_zz"}, + {"1110100"_b, "sqsub_z_p_zz"}, + {"1110110"_b, "uqsub_z_p_zz"}, + {"1111000"_b, "suqadd_z_p_zz"}, + {"1111010"_b, "usqadd_z_p_zz"}, + {"1111100"_b, "sqsubr_z_p_zz"}, + {"1111110"_b, "uqsubr_z_p_zz"}, + }, + }, + + { "_ptslzg", + {30, 23, 22, 13, 4}, + { {"01000"_b, "ldr_p_bi"}, + {"01100"_b, "prfb_i_p_bi_s"}, + {"01110"_b, "prfh_i_p_bi_s"}, + {"10x0x"_b, "ld1sw_z_p_bz_d_x32_scaled"}, + {"10x1x"_b, "ldff1sw_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_ptstkz", + {4}, + { {"0"_b, "ccmp_32_condcmp_imm"}, + }, + }, + + { "_ptyynt", + {13, 12, 11, 10}, + { {"1111"_b, "_stmtkr"}, + }, + }, + + { "_pvtyjz", + {30}, + { {"0"_b, "ldapur_32_ldapstl_unscaled"}, + {"1"_b, "ldapur_64_ldapstl_unscaled"}, + }, + }, + + { "_pxnyvl", + {23, 13, 12}, + { {"001"_b, "fmulx_asisdsame_only"}, + {"011"_b, "frecps_asisdsame_only"}, + {"111"_b, "frsqrts_asisdsame_only"}, + }, + }, + + { "_pxvjkp", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_rmkpsk"}, + }, + }, + + { "_pxyrpm", + {22, 11}, + { {"00"_b, "sqdmulh_z_zzi_s"}, + {"01"_b, "mul_z_zzi_s"}, + {"10"_b, "sqdmulh_z_zzi_d"}, + {"11"_b, "mul_z_zzi_d"}, + }, + }, + + { "_pxzvjl", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmadd_s_floatdp3"}, + {"001xxxx"_b, "fnmadd_d_floatdp3"}, + {"011xxxx"_b, "fnmadd_h_floatdp3"}, + {"10001x0"_b, "fmla_asisdelem_rh_h"}, + {"10x0001"_b, "sshr_asisdshf_r"}, + {"10x0101"_b, "ssra_asisdshf_r"}, + {"10x1001"_b, "srshr_asisdshf_r"}, + {"10x1101"_b, "srsra_asisdshf_r"}, + {"11x01x0"_b, "fmla_asisdelem_r_sd"}, + {"1xx11x0"_b, "sqdmlal_asisdelem_l"}, + }, + }, + + { "_pygvrr", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtunt_z_zz"}, + }, + }, + + { "_pyhrrt", + {30, 23, 22, 13, 12, 11, 10}, + { {"10001x0"_b, "fmulx_asisdelem_rh_h"}, + {"10x0001"_b, "sqshrun_asisdshf_n"}, + {"10x0011"_b, "sqrshrun_asisdshf_n"}, + {"10x0101"_b, "uqshrn_asisdshf_n"}, + {"10x0111"_b, "uqrshrn_asisdshf_n"}, + {"11x01x0"_b, "fmulx_asisdelem_r_sd"}, + }, + }, + + { "_pyjnpz", + {30, 13}, + { {"00"_b, "_xpqglq"}, + {"10"_b, "_ryrkqt"}, + {"11"_b, "_zjzmvh"}, + }, + }, + + { "_pyjtyn", + {22, 20, 19, 18, 17, 16}, + { {"111001"_b, "fcvtau_asisdmiscfp16_r"}, + {"x00001"_b, "fcvtau_asisdmisc_r"}, + {"x10000"_b, "fmaxnmp_asisdpair_only_sd"}, + }, + }, + + { "_pyttkp", + {30, 13, 12, 11, 10}, + { {"10001"_b, "sqrdmlah_asisdsame2_only"}, + {"10011"_b, "sqrdmlsh_asisdsame2_only"}, + }, + }, + + { "_pyvvqx", + {10}, + { {"0"_b, "_rkrntt"}, + }, + }, + + { "_pzpxxv", + {23, 22, 20, 19, 11}, + { {"00011"_b, "fcvtzs_asisdshf_c"}, + {"001x1"_b, "fcvtzs_asisdshf_c"}, + {"01xx1"_b, "fcvtzs_asisdshf_c"}, + }, + }, + + { "_pzttrn", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fmov_d_floatdp1"}, + {"000010"_b, "fneg_d_floatdp1"}, + {"000100"_b, "fcvt_sd_floatdp1"}, + {"000110"_b, "bfcvt_bs_floatdp1"}, + {"001000"_b, "frintn_d_floatdp1"}, + {"001010"_b, "frintm_d_floatdp1"}, + {"001100"_b, "frinta_d_floatdp1"}, + {"001110"_b, "frintx_d_floatdp1"}, + {"010000"_b, "frint32z_d_floatdp1"}, + {"010010"_b, "frint64z_d_floatdp1"}, + }, + }, + + { "_pzzgts", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_s_ldapstl_simd"}, + {"001xx10"_b, "ldapur_s_ldapstl_simd"}, + {"100xx10"_b, "stlur_d_ldapstl_simd"}, + {"101xx10"_b, "ldapur_d_ldapstl_simd"}, + {"x000001"_b, "cpyp_cpy_memcms"}, + {"x000101"_b, "cpypwt_cpy_memcms"}, + {"x001001"_b, "cpyprt_cpy_memcms"}, + {"x001101"_b, "cpypt_cpy_memcms"}, + {"x010001"_b, "cpym_cpy_memcms"}, + {"x010101"_b, "cpymwt_cpy_memcms"}, + {"x011001"_b, "cpymrt_cpy_memcms"}, + {"x011101"_b, "cpymt_cpy_memcms"}, + {"x100001"_b, "cpye_cpy_memcms"}, + {"x100101"_b, "cpyewt_cpy_memcms"}, + {"x101001"_b, "cpyert_cpy_memcms"}, + {"x101101"_b, "cpyet_cpy_memcms"}, + {"x110001"_b, "setgp_set_memcms"}, + {"x110101"_b, "setgpt_set_memcms"}, + {"x111001"_b, "setgpn_set_memcms"}, + {"x111101"_b, "setgptn_set_memcms"}, + }, + }, + + { "_qgqgkx", + {30, 23, 22}, + { {"000"_b, "adds_32s_addsub_ext"}, + {"100"_b, "subs_32s_addsub_ext"}, + }, + }, + + { "_qgshrr", + {30, 22, 20, 19, 18, 17, 16}, + { {"00xxxxx"_b, "stlxp_sp32_ldstexcl"}, + {"0111111"_b, "ldaxp_lp32_ldstexcl"}, + {"10xxxxx"_b, "stlxp_sp64_ldstexcl"}, + {"1111111"_b, "ldaxp_lp64_ldstexcl"}, + }, + }, + + { "_qgsrqq", + {23, 22}, + { {"00"_b, "fmadd_s_floatdp3"}, + {"01"_b, "fmadd_d_floatdp3"}, + {"11"_b, "fmadd_h_floatdp3"}, + }, + }, + + { "_qgvrqy", + {1}, + { {"1"_b, "blraaz_64_branch_reg"}, + }, + }, + + { "_qgvtrn", + {23, 22, 20, 19, 13, 11, 10}, + { {"00x1001"_b, "sqshrn_asisdshf_n"}, + {"00x1011"_b, "sqrshrn_asisdshf_n"}, + {"00xx0x0"_b, "fmul_asisdelem_rh_h"}, + {"010x001"_b, "sqshrn_asisdshf_n"}, + {"010x011"_b, "sqrshrn_asisdshf_n"}, + {"0111001"_b, "sqshrn_asisdshf_n"}, + {"0111011"_b, "sqrshrn_asisdshf_n"}, + {"0x10001"_b, "sqshrn_asisdshf_n"}, + {"0x10011"_b, "sqrshrn_asisdshf_n"}, + {"1xxx0x0"_b, "fmul_asisdelem_r_sd"}, + {"xxxx1x0"_b, "sqdmull_asisdelem_l"}, + }, + }, + + { "_qgymsy", + {11}, + { {"0"_b, "_hmsgpj"}, + }, + }, + + { "_qgyppr", + {23, 13, 12, 11, 10}, + { {"00010"_b, "_pyjtyn"}, + {"00110"_b, "_nhrkqm"}, + {"01001"_b, "fcmge_asisdsame_only"}, + {"01011"_b, "facge_asisdsame_only"}, + {"01110"_b, "_kxmjsh"}, + {"10010"_b, "_rpjgkh"}, + {"10101"_b, "fabd_asisdsame_only"}, + {"10110"_b, "_hmpzzg"}, + {"11001"_b, "fcmgt_asisdsame_only"}, + {"11011"_b, "facgt_asisdsame_only"}, + {"11110"_b, "_sxsxxt"}, + }, + }, + + { "_qhpkhm", + {18, 17}, + { {"00"_b, "ld1_asisdlse_r3_3v"}, + }, + }, + + { "_qhzvvh", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + }, + }, + + { "_qjqrgz", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldarh_lr32_ldstexcl"}, + }, + }, + + { "_qjrllr", + {23, 22, 12}, + { {"000"_b, "_pqsvty"}, + {"001"_b, "_rjrqxt"}, + {"010"_b, "_rnsmjq"}, + {"011"_b, "_msvhjv"}, + {"110"_b, "_rnlxtv"}, + {"111"_b, "_jjgpxz"}, + }, + }, + + { "_qjstll", + {18, 17}, + { {"0x"_b, "ld3_asisdlsop_sx3_r3s"}, + {"10"_b, "ld3_asisdlsop_sx3_r3s"}, + {"11"_b, "ld3_asisdlsop_s3_i3s"}, + }, + }, + + { "_qkhrkh", + {20, 19, 18, 17, 16}, + { {"00000"_b, "cmle_asisdmisc_z"}, + }, + }, + + { "_qkrnms", + {30}, + { {"0"_b, "orr_32_log_shift"}, + {"1"_b, "ands_32_log_shift"}, + }, + }, + + { "_qkxmvp", + {13, 12}, + { {"10"_b, "smin_64_dp_2src"}, + }, + }, + + { "_qkzjxm", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ldnt1w_z_p_bi_contiguous"}, + {"000x0"_b, "ldnt1w_z_p_br_contiguous"}, + {"00101"_b, "ld3w_z_p_bi_contiguous"}, + {"001x0"_b, "ld3w_z_p_br_contiguous"}, + {"01001"_b, "ldnt1d_z_p_bi_contiguous"}, + {"010x0"_b, "ldnt1d_z_p_br_contiguous"}, + {"01101"_b, "ld3d_z_p_bi_contiguous"}, + {"011x0"_b, "ld3d_z_p_br_contiguous"}, + {"10011"_b, "stnt1w_z_p_bi_contiguous"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"}, + {"10101"_b, "st1w_z_p_bi"}, + {"10111"_b, "st3w_z_p_bi_contiguous"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"}, + {"11011"_b, "stnt1d_z_p_bi_contiguous"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"}, + {"11111"_b, "st3d_z_p_bi_contiguous"}, + }, + }, + + { "_qljhnp", + {22}, + { {"0"_b, "sqdmullt_z_zzi_s"}, + {"1"_b, "sqdmullt_z_zzi_d"}, + }, + }, + + { "_qlmqyx", + {18, 17, 12}, + { {"0x0"_b, "ld3_asisdlsop_dx3_r3d"}, + {"100"_b, "ld3_asisdlsop_dx3_r3d"}, + {"110"_b, "ld3_asisdlsop_d3_i3d"}, + }, + }, + + { "_qlpnnn", + {23, 10, 4}, + { {"000"_b, "_vryrnh"}, + }, + }, + + { "_qlxksl", + {30}, + { {"0"_b, "_hrxyts"}, + {"1"_b, "_tytvjk"}, + }, + }, + + { "_qlxlxk", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldar_lr32_ldstexcl"}, + }, + }, + + { "_qlzvpg", + {13, 12, 11, 10}, + { {"0000"_b, "raddhn_asimddiff_n"}, + {"0001"_b, "ushl_asimdsame_only"}, + {"0010"_b, "_kpnlmr"}, + {"0011"_b, "uqshl_asimdsame_only"}, + {"0100"_b, "uabal_asimddiff_l"}, + {"0101"_b, "urshl_asimdsame_only"}, + {"0110"_b, "_ssqyrk"}, + {"0111"_b, "uqrshl_asimdsame_only"}, + {"1000"_b, "rsubhn_asimddiff_n"}, + {"1001"_b, "umax_asimdsame_only"}, + {"1010"_b, "_sjlqvg"}, + {"1011"_b, "umin_asimdsame_only"}, + {"1100"_b, "uabdl_asimddiff_l"}, + {"1101"_b, "uabd_asimdsame_only"}, + {"1110"_b, "_gplkxy"}, + {"1111"_b, "uaba_asimdsame_only"}, + }, + }, + + { "_qnprqt", + {4}, + { {"0"_b, "eor_p_p_pp_z"}, + {"1"_b, "sel_p_p_pp"}, + }, + }, + + { "_qntrvk", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmsub_s_floatdp3"}, + {"001xxxx"_b, "fnmsub_d_floatdp3"}, + {"011xxxx"_b, "fnmsub_h_floatdp3"}, + {"10001x0"_b, "fmul_asisdelem_rh_h"}, + {"10x0101"_b, "sqshrn_asisdshf_n"}, + {"10x0111"_b, "sqrshrn_asisdshf_n"}, + {"11x01x0"_b, "fmul_asisdelem_r_sd"}, + {"1xx11x0"_b, "sqdmull_asisdelem_l"}, + }, + }, + + { "_qnysqv", + {30}, + { {"0"_b, "cbnz_64_compbranch"}, + }, + }, + + { "_qpgxxr", + {23, 22}, + { {"01"_b, "fadd_asimdsamefp16_only"}, + {"11"_b, "fsub_asimdsamefp16_only"}, + }, + }, + + { "_qpsryx", + {30, 23, 22, 11, 10}, + { {"01000"_b, "csel_64_condsel"}, + {"01001"_b, "csinc_64_condsel"}, + {"11000"_b, "csinv_64_condsel"}, + {"11001"_b, "csneg_64_condsel"}, + }, + }, + + { "_qpvgnh", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld2b_z_p_bi_contiguous"}, + {"000x0"_b, "ld2b_z_p_br_contiguous"}, + {"00101"_b, "ld4b_z_p_bi_contiguous"}, + {"001x0"_b, "ld4b_z_p_br_contiguous"}, + {"01001"_b, "ld2h_z_p_bi_contiguous"}, + {"010x0"_b, "ld2h_z_p_br_contiguous"}, + {"01101"_b, "ld4h_z_p_bi_contiguous"}, + {"011x0"_b, "ld4h_z_p_br_contiguous"}, + {"10011"_b, "st2b_z_p_bi_contiguous"}, + {"10111"_b, "st4b_z_p_bi_contiguous"}, + {"10x01"_b, "st1b_z_p_bi"}, + {"11011"_b, "st2h_z_p_bi_contiguous"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_scaled"}, + {"11111"_b, "st4h_z_p_bi_contiguous"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_scaled"}, + {"11x01"_b, "st1h_z_p_bi"}, + }, + }, + + { "_qpyxsv", + {18}, + { {"0"_b, "ld4_asisdlso_h4_4h"}, + }, + }, + + { "_qqjtpm", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autdza_64z_dp_1src"}, + }, + }, + + { "_qqslmv", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fcvtns_32s_float2int"}, + {"00001"_b, "fcvtnu_32s_float2int"}, + {"00010"_b, "scvtf_s32_float2int"}, + {"00011"_b, "ucvtf_s32_float2int"}, + {"00100"_b, "fcvtas_32s_float2int"}, + {"00101"_b, "fcvtau_32s_float2int"}, + {"00110"_b, "fmov_32s_float2int"}, + {"00111"_b, "fmov_s32_float2int"}, + {"01000"_b, "fcvtps_32s_float2int"}, + {"01001"_b, "fcvtpu_32s_float2int"}, + {"10000"_b, "fcvtms_32s_float2int"}, + {"10001"_b, "fcvtmu_32s_float2int"}, + {"11000"_b, "fcvtzs_32s_float2int"}, + {"11001"_b, "fcvtzu_32s_float2int"}, + }, + }, + + { "_qqvgql", + {4, 3, 2, 1, 0}, + { {"11111"_b, "_gtsglj"}, + }, + }, + + { "_qqyryl", + {30, 23, 22, 13, 4}, + { {"00x0x"_b, "ld1w_z_p_bz_s_x32_unscaled"}, + {"00x1x"_b, "ldff1w_z_p_bz_s_x32_unscaled"}, + {"0100x"_b, "ldr_z_bi"}, + {"01100"_b, "prfw_i_p_bi_s"}, + {"01110"_b, "prfd_i_p_bi_s"}, + {"10x0x"_b, "ld1w_z_p_bz_d_x32_unscaled"}, + {"10x1x"_b, "ldff1w_z_p_bz_d_x32_unscaled"}, + {"11x0x"_b, "ld1d_z_p_bz_d_x32_unscaled"}, + {"11x1x"_b, "ldff1d_z_p_bz_d_x32_unscaled"}, + }, + }, + + { "_qrsxzp", + {23, 22, 20, 19, 16, 13, 10}, + { {"0000000"_b, "_tjnzjl"}, + {"0000001"_b, "_nklyky"}, + {"0000010"_b, "_pjskhr"}, + {"0000011"_b, "_kqstrr"}, + {"0000101"_b, "_nvzsxn"}, + {"0100000"_b, "_jnktqs"}, + {"0100001"_b, "_ttzlqn"}, + {"0100010"_b, "_sxgnmg"}, + {"0100011"_b, "_yqzxvr"}, + {"0100101"_b, "_tvtvkt"}, + {"100xx00"_b, "st1_asisdlsop_sx1_r1s"}, + {"100xx01"_b, "_mnzgkx"}, + {"100xx10"_b, "st3_asisdlsop_sx3_r3s"}, + {"100xx11"_b, "_tjxyky"}, + {"1010x00"_b, "st1_asisdlsop_sx1_r1s"}, + {"1010x01"_b, "_mphkpq"}, + {"1010x10"_b, "st3_asisdlsop_sx3_r3s"}, + {"1010x11"_b, "_hqkhsy"}, + {"1011000"_b, "st1_asisdlsop_sx1_r1s"}, + {"1011001"_b, "_qsszkx"}, + {"1011010"_b, "st3_asisdlsop_sx3_r3s"}, + {"1011011"_b, "_gsjvmx"}, + {"1011100"_b, "_gqmjys"}, + {"1011101"_b, "_qtqrmn"}, + {"1011110"_b, "_mjrqhl"}, + {"1011111"_b, "_ngtlpz"}, + {"110xx00"_b, "ld1_asisdlsop_sx1_r1s"}, + {"110xx01"_b, "_hkjjsr"}, + {"110xx10"_b, "ld3_asisdlsop_sx3_r3s"}, + {"110xx11"_b, "_yryygq"}, + {"1110x00"_b, "ld1_asisdlsop_sx1_r1s"}, + {"1110x01"_b, "_tptqjs"}, + {"1110x10"_b, "ld3_asisdlsop_sx3_r3s"}, + {"1110x11"_b, "_szmyzt"}, + {"1111000"_b, "ld1_asisdlsop_sx1_r1s"}, + {"1111001"_b, "_zxklzp"}, + {"1111010"_b, "ld3_asisdlsop_sx3_r3s"}, + {"1111011"_b, "_qzxgqh"}, + {"1111100"_b, "_yzgthp"}, + {"1111101"_b, "_rgnryt"}, + {"1111110"_b, "_qjstll"}, + {"1111111"_b, "_qlmqyx"}, + }, + }, + + { "_qrtjvn", + {30, 23, 22, 20, 19, 12, 11}, + { {"0000000"_b, "movi_asimdimm_d_ds"}, + {"1000000"_b, "movi_asimdimm_d2_d"}, + {"1000010"_b, "fmov_asimdimm_d2_d"}, + {"x00x100"_b, "ucvtf_asimdshf_c"}, + {"x00x111"_b, "fcvtzu_asimdshf_c"}, + {"x010x00"_b, "ucvtf_asimdshf_c"}, + {"x010x11"_b, "fcvtzu_asimdshf_c"}, + {"x011100"_b, "ucvtf_asimdshf_c"}, + {"x011111"_b, "fcvtzu_asimdshf_c"}, + {"x0x1000"_b, "ucvtf_asimdshf_c"}, + {"x0x1011"_b, "fcvtzu_asimdshf_c"}, + }, + }, + + { "_qrygny", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1b_z_p_bi_u8"}, + {"00011"_b, "ldnf1b_z_p_bi_u8"}, + {"00101"_b, "ld1b_z_p_bi_u32"}, + {"00111"_b, "ldnf1b_z_p_bi_u32"}, + {"01001"_b, "ld1sw_z_p_bi_s64"}, + {"01011"_b, "ldnf1sw_z_p_bi_s64"}, + {"01101"_b, "ld1h_z_p_bi_u32"}, + {"01111"_b, "ldnf1h_z_p_bi_u32"}, + {"100x0"_b, "st1b_z_p_bz_d_x32_unscaled"}, + {"100x1"_b, "st1b_z_p_bz_d_64_unscaled"}, + {"101x0"_b, "st1b_z_p_bz_s_x32_unscaled"}, + {"101x1"_b, "st1b_z_p_ai_d"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_unscaled"}, + {"110x1"_b, "st1h_z_p_bz_d_64_unscaled"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_unscaled"}, + {"111x1"_b, "st1h_z_p_ai_d"}, + }, + }, + + { "_qsszkx", + {12}, + { {"0"_b, "st1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_qtghgs", + {22}, + { {"0"_b, "ldrsw_64_ldst_regoff"}, + }, + }, + + { "_qtgrzv", + {20, 18, 17}, + { {"000"_b, "_gznrjv"}, + }, + }, + + { "_qtgvlx", + {23, 22, 20, 19, 17, 16}, + { {"000010"_b, "scvtf_s64_float2fix"}, + {"000011"_b, "ucvtf_s64_float2fix"}, + {"001100"_b, "fcvtzs_64s_float2fix"}, + {"001101"_b, "fcvtzu_64s_float2fix"}, + {"010010"_b, "scvtf_d64_float2fix"}, + {"010011"_b, "ucvtf_d64_float2fix"}, + {"011100"_b, "fcvtzs_64d_float2fix"}, + {"011101"_b, "fcvtzu_64d_float2fix"}, + {"110010"_b, "scvtf_h64_float2fix"}, + {"110011"_b, "ucvtf_h64_float2fix"}, + {"111100"_b, "fcvtzs_64h_float2fix"}, + {"111101"_b, "fcvtzu_64h_float2fix"}, + }, + }, + + { "_qtqrmn", + {18, 17, 12}, + { {"0x0"_b, "st1_asisdlsop_dx1_r1d"}, + {"100"_b, "st1_asisdlsop_dx1_r1d"}, + {"110"_b, "st1_asisdlsop_d1_i1d"}, + }, + }, + + { "_qtxlsr", + {13, 12, 11, 10}, + { {"1111"_b, "cas_c64_ldstexcl"}, + }, + }, + + { "_qtxpky", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_qvgtlh", + {30, 23, 22, 11}, + { {"0001"_b, "strb_32b_ldst_regoff"}, + {"0011"_b, "ldrb_32b_ldst_regoff"}, + {"0100"_b, "_hjplhs"}, + {"0101"_b, "ldrsb_64b_ldst_regoff"}, + {"0111"_b, "ldrsb_32b_ldst_regoff"}, + {"1001"_b, "strh_32_ldst_regoff"}, + {"1011"_b, "ldrh_32_ldst_regoff"}, + {"1100"_b, "_vrzksz"}, + {"1101"_b, "ldrsh_64_ldst_regoff"}, + {"1111"_b, "ldrsh_32_ldst_regoff"}, + }, + }, + + { "_qvjmmq", + {30}, + { {"0"_b, "b_only_branch_imm"}, + }, + }, + + { "_qvlnll", + {22, 20, 11}, + { {"010"_b, "decw_r_rs"}, + {"110"_b, "decd_r_rs"}, + }, + }, + + { "_qvtxpr", + {20, 9, 4}, + { {"000"_b, "uzp1_p_pp"}, + }, + }, + + { "_qvzvmq", + {30, 23, 22}, + { {"000"_b, "stlxrb_sr32_ldstexcl"}, + {"001"_b, "_ynznxv"}, + {"010"_b, "_lqlrxp"}, + {"011"_b, "_grprpj"}, + {"100"_b, "stlxrh_sr32_ldstexcl"}, + {"101"_b, "_jgsryt"}, + {"110"_b, "_qyrqxp"}, + {"111"_b, "_qjqrgz"}, + }, + }, + + { "_qyjvqr", + {23, 18, 17, 16}, + { {"0000"_b, "sqxtnt_z_zz"}, + }, + }, + + { "_qyrqxp", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stlrh_sl32_ldstexcl"}, + }, + }, + + { "_qyyrqq", + {22, 13, 12}, + { {"000"_b, "swp_64_memop"}, + {"001"_b, "_ymghnh"}, + {"010"_b, "st64bv0_64_memop"}, + {"011"_b, "st64bv_64_memop"}, + {"100"_b, "swpl_64_memop"}, + }, + }, + + { "_qzlvkm", + {18}, + { {"0"_b, "st3_asisdlsop_hx3_r3h"}, + {"1"_b, "st3_asisdlsop_h3_i3h"}, + }, + }, + + { "_qzmrnj", + {23, 22}, + { {"00"_b, "dup_asimdins_dr_r"}, + {"01"_b, "fmla_asimdsamefp16_only"}, + {"11"_b, "fmls_asimdsamefp16_only"}, + }, + }, + + { "_qzsyvx", + {30, 23, 22, 11, 10}, + { {"00010"_b, "str_s_ldst_regoff"}, + {"00110"_b, "ldr_s_ldst_regoff"}, + {"10010"_b, "str_d_ldst_regoff"}, + {"10110"_b, "ldr_d_ldst_regoff"}, + }, + }, + + { "_qzxgqh", + {12}, + { {"0"_b, "ld3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_rgnryt", + {18, 17, 12}, + { {"0x0"_b, "ld1_asisdlsop_dx1_r1d"}, + {"100"_b, "ld1_asisdlsop_dx1_r1d"}, + {"110"_b, "ld1_asisdlsop_d1_i1d"}, + }, + }, + + { "_rgxthl", + {30, 23, 22}, + { {"000"_b, "stxp_sp32_ldstexcl"}, + {"001"_b, "_mjyhsl"}, + {"010"_b, "_vrsjnp"}, + {"011"_b, "_zyxnpz"}, + {"100"_b, "stxp_sp64_ldstexcl"}, + {"101"_b, "_snrzky"}, + {"110"_b, "_qtxlsr"}, + {"111"_b, "_pkpvmj"}, + }, + }, + + { "_rgztgm", + {20, 18, 17}, + { {"000"_b, "_klrksl"}, + }, + }, + + { "_rhhrhg", + {30, 13, 4}, + { {"000"_b, "cmphs_p_p_zw"}, + {"001"_b, "cmphi_p_p_zw"}, + {"010"_b, "cmplo_p_p_zw"}, + {"011"_b, "cmpls_p_p_zw"}, + }, + }, + + { "_rhmxyp", + {20, 9, 4}, + { {"000"_b, "trn1_p_pp"}, + }, + }, + + { "_rhpmjz", + {12, 11}, + { {"00"_b, "incp_z_p_z"}, + {"01"_b, "incp_r_p_r"}, + {"10"_b, "_mpstrr"}, + }, + }, + + { "_rjmhxr", + {30}, + { {"0"_b, "adds_64_addsub_shift"}, + {"1"_b, "subs_64_addsub_shift"}, + }, + }, + + { "_rjrqxt", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_s_floatimm"}, + }, + }, + + { "_rjspzr", + {13, 12}, + { {"00"_b, "udiv_32_dp_2src"}, + {"10"_b, "asrv_32_dp_2src"}, + }, + }, + + { "_rjthsm", + {30, 23, 22}, + { {"001"_b, "sbfm_64m_bitfield"}, + {"101"_b, "ubfm_64m_bitfield"}, + }, + }, + + { "_rjvgkl", + {30, 23, 22, 19, 18, 17, 16}, + { {"000xxxx"_b, "umov_asimdins_w_w"}, + {"1001000"_b, "umov_asimdins_x_x"}, + {"x01xxxx"_b, "frecps_asimdsamefp16_only"}, + {"x11xxxx"_b, "frsqrts_asimdsamefp16_only"}, + }, + }, + + { "_rjyrnt", + {4}, + { {"0"_b, "cmpge_p_p_zi"}, + {"1"_b, "cmpgt_p_p_zi"}, + }, + }, + + { "_rjysnh", + {18, 17, 16, 9, 8, 7, 6}, + { {"0000000"_b, "fadd_z_p_zs"}, + {"0010000"_b, "fsub_z_p_zs"}, + {"0100000"_b, "fmul_z_p_zs"}, + {"0110000"_b, "fsubr_z_p_zs"}, + {"1000000"_b, "fmaxnm_z_p_zs"}, + {"1010000"_b, "fminnm_z_p_zs"}, + {"1100000"_b, "fmax_z_p_zs"}, + {"1110000"_b, "fmin_z_p_zs"}, + }, + }, + + { "_rkjjtp", + {23, 22, 20, 19, 11}, + { {"00010"_b, "scvtf_asisdshf_c"}, + {"001x0"_b, "scvtf_asisdshf_c"}, + {"01xx0"_b, "scvtf_asisdshf_c"}, + }, + }, + + { "_rknxlg", + {12}, + { {"0"_b, "ld4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_rkpylh", + {20, 19, 18, 17, 16}, + { {"00010"_b, "scvtf_d32_float2fix"}, + {"00011"_b, "ucvtf_d32_float2fix"}, + {"11000"_b, "fcvtzs_32d_float2fix"}, + {"11001"_b, "fcvtzu_32d_float2fix"}, + }, + }, + + { "_rkrlsy", + {20, 19, 18, 17, 16}, + { {"00000"_b, "rev64_asimdmisc_r"}, + }, + }, + + { "_rkrntt", + {23, 22, 20, 19, 17, 16, 13}, + { {"0000000"_b, "_hynprk"}, + {"0000001"_b, "_phpphm"}, + {"0100000"_b, "_tlvmlq"}, + {"0100001"_b, "_qpyxsv"}, + {"100xxx0"_b, "st2_asisdlsop_hx2_r2h"}, + {"100xxx1"_b, "st4_asisdlsop_hx4_r4h"}, + {"1010xx0"_b, "st2_asisdlsop_hx2_r2h"}, + {"1010xx1"_b, "st4_asisdlsop_hx4_r4h"}, + {"10110x0"_b, "st2_asisdlsop_hx2_r2h"}, + {"10110x1"_b, "st4_asisdlsop_hx4_r4h"}, + {"1011100"_b, "st2_asisdlsop_hx2_r2h"}, + {"1011101"_b, "st4_asisdlsop_hx4_r4h"}, + {"1011110"_b, "_skmzll"}, + {"1011111"_b, "_hkxlsm"}, + {"110xxx0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"110xxx1"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1110xx0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"1110xx1"_b, "ld4_asisdlsop_hx4_r4h"}, + {"11110x0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"11110x1"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1111100"_b, "ld2_asisdlsop_hx2_r2h"}, + {"1111101"_b, "ld4_asisdlsop_hx4_r4h"}, + {"1111110"_b, "_ykhhqq"}, + {"1111111"_b, "_khtsmx"}, + }, + }, + + { "_rkskkv", + {18}, + { {"1"_b, "fminv_v_p_z"}, + }, + }, + + { "_rktqym", + {30, 23, 22, 13, 12, 11, 10}, + { {"010xx00"_b, "csel_32_condsel"}, + {"010xx01"_b, "csinc_32_condsel"}, + {"0110000"_b, "crc32b_32c_dp_2src"}, + {"0110001"_b, "crc32h_32c_dp_2src"}, + {"0110010"_b, "crc32w_32c_dp_2src"}, + {"0110100"_b, "crc32cb_32c_dp_2src"}, + {"0110101"_b, "crc32ch_32c_dp_2src"}, + {"0110110"_b, "crc32cw_32c_dp_2src"}, + {"0111000"_b, "smax_32_dp_2src"}, + {"0111001"_b, "umax_32_dp_2src"}, + {"0111010"_b, "smin_32_dp_2src"}, + {"0111011"_b, "umin_32_dp_2src"}, + {"110xx00"_b, "csinv_32_condsel"}, + {"110xx01"_b, "csneg_32_condsel"}, + }, + }, + + { "_rkxlyj", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_b_ldapstl_simd"}, + {"001xx10"_b, "ldapur_b_ldapstl_simd"}, + {"010xx10"_b, "stlur_q_ldapstl_simd"}, + {"011xx10"_b, "ldapur_q_ldapstl_simd"}, + {"100xx10"_b, "stlur_h_ldapstl_simd"}, + {"101xx10"_b, "ldapur_h_ldapstl_simd"}, + {"x000001"_b, "cpyprn_cpy_memcms"}, + {"x000101"_b, "cpypwtrn_cpy_memcms"}, + {"x001001"_b, "cpyprtrn_cpy_memcms"}, + {"x001101"_b, "cpyptrn_cpy_memcms"}, + {"x010001"_b, "cpymrn_cpy_memcms"}, + {"x010101"_b, "cpymwtrn_cpy_memcms"}, + {"x011001"_b, "cpymrtrn_cpy_memcms"}, + {"x011101"_b, "cpymtrn_cpy_memcms"}, + {"x100001"_b, "cpyern_cpy_memcms"}, + {"x100101"_b, "cpyewtrn_cpy_memcms"}, + {"x101001"_b, "cpyertrn_cpy_memcms"}, + {"x101101"_b, "cpyetrn_cpy_memcms"}, + {"x110001"_b, "setge_set_memcms"}, + {"x110101"_b, "setget_set_memcms"}, + {"x111001"_b, "setgen_set_memcms"}, + {"x111101"_b, "setgetn_set_memcms"}, + }, + }, + + { "_rkzlpp", + {4}, + { {"0"_b, "ccmp_64_condcmp_reg"}, + }, + }, + + { "_rlgtnn", + {23}, + { {"0"_b, "_sxsgmq"}, + }, + }, + + { "_rlpmrx", + {30}, + { {"0"_b, "_txzxzs"}, + {"1"_b, "_htsjxj"}, + }, + }, + + { "_rlrjxp", + {13, 4}, + { {"00"_b, "fcmge_p_p_zz"}, + {"01"_b, "fcmgt_p_p_zz"}, + {"10"_b, "fcmeq_p_p_zz"}, + {"11"_b, "fcmne_p_p_zz"}, + }, + }, + + { "_rlxhxz", + {9, 8, 7, 6, 5}, + { {"11111"_b, "pacdzb_64z_dp_1src"}, + }, + }, + + { "_rlylxh", + {18}, + { {"0"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1"_b, "ld3_asisdlsop_b3_i3b"}, + }, + }, + + { "_rlyvpn", + {23, 12, 11, 10}, + { {"0000"_b, "sqshrunb_z_zi"}, + {"0001"_b, "sqshrunt_z_zi"}, + {"0010"_b, "sqrshrunb_z_zi"}, + {"0011"_b, "sqrshrunt_z_zi"}, + {"0100"_b, "shrnb_z_zi"}, + {"0101"_b, "shrnt_z_zi"}, + {"0110"_b, "rshrnb_z_zi"}, + {"0111"_b, "rshrnt_z_zi"}, + }, + }, + + { "_rmkpsk", + {23}, + { {"0"_b, "_srkslp"}, + }, + }, + + { "_rmmpym", + {2, 1, 0}, + { {"000"_b, "_glgznt"}, + }, + }, + + { "_rmyzpp", + {20, 19, 18, 17}, + { {"0000"_b, "_gnhjkl"}, + }, + }, + + { "_rnlxtv", + {13}, + { {"0"_b, "_vvgpzq"}, + {"1"_b, "_mqljmr"}, + }, + }, + + { "_rnphqp", + {20, 19, 18, 17, 16, 4, 3}, + { {"0000001"_b, "fcmp_hz_floatcmp"}, + {"0000011"_b, "fcmpe_hz_floatcmp"}, + {"xxxxx00"_b, "fcmp_h_floatcmp"}, + {"xxxxx10"_b, "fcmpe_h_floatcmp"}, + }, + }, + + { "_rnqmyp", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "mvni_asimdimm_l_sl"}, + {"00x100"_b, "sri_asimdshf_r"}, + {"00x110"_b, "sqshlu_asimdshf_r"}, + {"010x00"_b, "sri_asimdshf_r"}, + {"010x10"_b, "sqshlu_asimdshf_r"}, + {"011100"_b, "sri_asimdshf_r"}, + {"011110"_b, "sqshlu_asimdshf_r"}, + {"0x1000"_b, "sri_asimdshf_r"}, + {"0x1010"_b, "sqshlu_asimdshf_r"}, + }, + }, + + { "_rnqtmt", + {30}, + { {"0"_b, "_zyjjgs"}, + {"1"_b, "_lrntmz"}, + }, + }, + + { "_rnsmjq", + {13}, + { {"0"_b, "_xxqzvy"}, + {"1"_b, "_rmmpym"}, + }, + }, + + { "_rpjgkh", + {22, 20, 19, 18, 17, 16}, + { {"111000"_b, "fcmge_asisdmiscfp16_fz"}, + {"x00000"_b, "fcmge_asisdmisc_fz"}, + {"x10000"_b, "fminnmp_asisdpair_only_sd"}, + }, + }, + + { "_rpjrhs", + {23, 22, 4}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, + }, + }, + + { "_rpplns", + {23, 22, 20, 19, 11}, + { {"00010"_b, "srshr_asisdshf_r"}, + {"001x0"_b, "srshr_asisdshf_r"}, + {"01xx0"_b, "srshr_asisdshf_r"}, + }, + }, + + { "_rpzykx", + {11}, + { {"0"_b, "_svvyrz"}, + }, + }, + + { "_rqghyv", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stur_32_ldst_unscaled"}, + {"00001"_b, "str_32_ldst_immpost"}, + {"00010"_b, "sttr_32_ldst_unpriv"}, + {"00011"_b, "str_32_ldst_immpre"}, + {"00100"_b, "ldur_32_ldst_unscaled"}, + {"00101"_b, "ldr_32_ldst_immpost"}, + {"00110"_b, "ldtr_32_ldst_unpriv"}, + {"00111"_b, "ldr_32_ldst_immpre"}, + {"01000"_b, "ldursw_64_ldst_unscaled"}, + {"01001"_b, "ldrsw_64_ldst_immpost"}, + {"01010"_b, "ldtrsw_64_ldst_unpriv"}, + {"01011"_b, "ldrsw_64_ldst_immpre"}, + {"10000"_b, "stur_64_ldst_unscaled"}, + {"10001"_b, "str_64_ldst_immpost"}, + {"10010"_b, "sttr_64_ldst_unpriv"}, + {"10011"_b, "str_64_ldst_immpre"}, + {"10100"_b, "ldur_64_ldst_unscaled"}, + {"10101"_b, "ldr_64_ldst_immpost"}, + {"10110"_b, "ldtr_64_ldst_unpriv"}, + {"10111"_b, "ldr_64_ldst_immpre"}, + {"11000"_b, "prfum_p_ldst_unscaled"}, + }, + }, + + { "_rqhryp", + {12, 10}, + { {"00"_b, "_kjpxvh"}, + {"01"_b, "_mxvjxx"}, + {"10"_b, "sm4ekey_z_zz"}, + {"11"_b, "rax1_z_zz"}, + }, + }, + + { "_rqpjjs", + {30, 11, 10}, + { {"000"_b, "_qjrllr"}, + {"001"_b, "_xlgxhn"}, + {"010"_b, "_hxrnns"}, + {"011"_b, "_xnhkpk"}, + {"101"_b, "_mmgpkx"}, + {"110"_b, "_vxhjgg"}, + {"111"_b, "_lptrlg"}, + }, + }, + + { "_rqzpzq", + {23, 22, 11, 10, 4, 3, 0}, + { {"0000000"_b, "_hkxzqg"}, + {"0010111"_b, "_zqlzzp"}, + {"0011111"_b, "_lvszgj"}, + {"0100000"_b, "_tmsjzg"}, + {"0110111"_b, "_kzprzt"}, + {"0111111"_b, "_tzsnmy"}, + {"1000000"_b, "_mqmrng"}, + {"1010111"_b, "_hrmsnk"}, + {"1011111"_b, "_tqlrzh"}, + }, + }, + + { "_rrkmyl", + {23, 22, 4}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, + }, + }, + + { "_rrvltp", + {18, 4}, + { {"00"_b, "fcmlt_p_p_z0"}, + {"01"_b, "fcmle_p_p_z0"}, + }, + }, + + { "_rshyht", + {13}, + { {"0"_b, "facge_p_p_zz"}, + {"1"_b, "facgt_p_p_zz"}, + }, + }, + + { "_rsjgyk", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld2w_z_p_bi_contiguous"}, + {"000x0"_b, "ld2w_z_p_br_contiguous"}, + {"00101"_b, "ld4w_z_p_bi_contiguous"}, + {"001x0"_b, "ld4w_z_p_br_contiguous"}, + {"01001"_b, "ld2d_z_p_bi_contiguous"}, + {"010x0"_b, "ld2d_z_p_br_contiguous"}, + {"01101"_b, "ld4d_z_p_bi_contiguous"}, + {"011x0"_b, "ld4d_z_p_br_contiguous"}, + {"10011"_b, "st2w_z_p_bi_contiguous"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_scaled"}, + {"10101"_b, "st1w_z_p_bi"}, + {"10111"_b, "st4w_z_p_bi_contiguous"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_scaled"}, + {"11011"_b, "st2d_z_p_bi_contiguous"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_scaled"}, + {"11101"_b, "st1d_z_p_bi"}, + {"11111"_b, "st4d_z_p_bi_contiguous"}, + }, + }, + + { "_rsmyth", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stllr_sl64_ldstexcl"}, + }, + }, + + { "_rsnvnr", + {30, 23, 22}, + { {"100"_b, "ins_asimdins_ir_r"}, + {"x01"_b, "fmulx_asimdsamefp16_only"}, + }, + }, + + { "_rspmth", + {18}, + { {"0"_b, "st1_asisdlse_r2_2v"}, + }, + }, + + { "_rsqmgk", + {23, 22, 20, 19, 18, 17, 16}, + { {"0000000"_b, "movprfx_z_z"}, + }, + }, + + { "_rsqxrs", + {30, 23, 22, 11, 10}, + { {"00000"_b, "_ggvlym"}, + {"01000"_b, "csel_32_condsel"}, + {"01001"_b, "csinc_32_condsel"}, + {"01100"_b, "_svvylr"}, + {"01101"_b, "_zmhqmr"}, + {"01110"_b, "_rjspzr"}, + {"01111"_b, "_vpknjg"}, + {"10000"_b, "_rzymmk"}, + {"11000"_b, "csinv_32_condsel"}, + {"11001"_b, "csneg_32_condsel"}, + {"11100"_b, "_kzjxxk"}, + {"11101"_b, "_khvvtr"}, + {"11110"_b, "_gvpvjn"}, + {"11111"_b, "_pkjqsy"}, + }, + }, + + { "_rssrty", + {30, 23, 22, 13, 12, 11, 10}, + { {"1011011"_b, "bfmmla_asimdsame2_e"}, + {"x011111"_b, "bfdot_asimdsame2_d"}, + {"x111111"_b, "bfmlal_asimdsame2_f"}, + {"xxx0xx1"_b, "fcmla_asimdsame2_c"}, + {"xxx1x01"_b, "fcadd_asimdsame2_c"}, + }, + }, + + { "_rszgzl", + {30, 23, 22}, + { {"000"_b, "smsubl_64wa_dp_3src"}, + {"010"_b, "umsubl_64wa_dp_3src"}, + }, + }, + + { "_rtlvxq", + {30, 23, 22}, + { {"000"_b, "madd_32a_dp_3src"}, + }, + }, + + { "_rtpztp", + {22}, + { {"0"_b, "umullb_z_zzi_s"}, + {"1"_b, "umullb_z_zzi_d"}, + }, + }, + + { "_rtrlts", + {23, 22, 12, 11, 10}, + { {"01000"_b, "bfdot_z_zzz"}, + {"10000"_b, "fmlalb_z_zzz"}, + {"10001"_b, "fmlalt_z_zzz"}, + {"11000"_b, "bfmlalb_z_zzz"}, + {"11001"_b, "bfmlalt_z_zzz"}, + }, + }, + + { "_rvjkyp", + {13, 12}, + { {"01"_b, "gmi_64g_dp_2src"}, + {"10"_b, "lsrv_64_dp_2src"}, + }, + }, + + { "_rvsylx", + {18}, + { {"1"_b, "frecpe_z_z"}, + }, + }, + + { "_rvtxys", + {23, 22, 20, 19, 11}, + { {"00010"_b, "sshr_asisdshf_r"}, + {"001x0"_b, "sshr_asisdshf_r"}, + {"01xx0"_b, "sshr_asisdshf_r"}, + }, + }, + + { "_rvvshx", + {23, 22, 13, 12}, + { {"0000"_b, "fmax_s_floatdp2"}, + {"0001"_b, "fmin_s_floatdp2"}, + {"0010"_b, "fmaxnm_s_floatdp2"}, + {"0011"_b, "fminnm_s_floatdp2"}, + {"0100"_b, "fmax_d_floatdp2"}, + {"0101"_b, "fmin_d_floatdp2"}, + {"0110"_b, "fmaxnm_d_floatdp2"}, + {"0111"_b, "fminnm_d_floatdp2"}, + {"1100"_b, "fmax_h_floatdp2"}, + {"1101"_b, "fmin_h_floatdp2"}, + {"1110"_b, "fmaxnm_h_floatdp2"}, + {"1111"_b, "fminnm_h_floatdp2"}, + }, + }, + + { "_rxgkjn", + {30, 23, 22}, + { {"000"_b, "adds_64s_addsub_ext"}, + {"100"_b, "subs_64s_addsub_ext"}, + }, + }, + + { "_rxhssh", + {18}, + { {"0"_b, "ld3_asisdlsop_hx3_r3h"}, + {"1"_b, "ld3_asisdlsop_h3_i3h"}, + }, + }, + + { "_rxnnvv", + {23, 22, 4, 3, 2, 1, 0}, + { {"0000000"_b, "brk_ex_exception"}, + {"0100000"_b, "tcancel_ex_exception"}, + {"1000001"_b, "dcps1_dc_exception"}, + {"1000010"_b, "dcps2_dc_exception"}, + {"1000011"_b, "dcps3_dc_exception"}, + }, + }, + + { "_rxsqhv", + {13, 12}, + { {"00"_b, "adc_64_addsub_carry"}, + }, + }, + + { "_rxtklv", + {30, 18}, + { {"00"_b, "_qtgvlx"}, + }, + }, + + { "_rxytqg", + {30, 23, 22, 20, 19, 18}, + { {"00xxxx"_b, "add_64_addsub_imm"}, + {"011000"_b, "smax_64_minmax_imm"}, + {"011001"_b, "umax_64u_minmax_imm"}, + {"011010"_b, "smin_64_minmax_imm"}, + {"011011"_b, "umin_64u_minmax_imm"}, + {"10xxxx"_b, "sub_64_addsub_imm"}, + }, + }, + + { "_ryrkqt", + {20, 19}, + { {"00"_b, "_tsskys"}, + {"01"_b, "_kqvljp"}, + {"10"_b, "_lxhlkx"}, + {"11"_b, "_rjysnh"}, + }, + }, + + { "_rznrqt", + {22}, + { {"0"_b, "umullt_z_zzi_s"}, + {"1"_b, "umullt_z_zzi_d"}, + }, + }, + + { "_rzpqmm", + {23, 22, 20, 19, 17, 16, 13}, + { {"0000000"_b, "_nygsjm"}, + {"0000001"_b, "_snjmrt"}, + {"0100000"_b, "_hhxpjz"}, + {"0100001"_b, "_tktgvg"}, + {"100xxx0"_b, "st2_asisdlsop_bx2_r2b"}, + {"100xxx1"_b, "st4_asisdlsop_bx4_r4b"}, + {"1010xx0"_b, "st2_asisdlsop_bx2_r2b"}, + {"1010xx1"_b, "st4_asisdlsop_bx4_r4b"}, + {"10110x0"_b, "st2_asisdlsop_bx2_r2b"}, + {"10110x1"_b, "st4_asisdlsop_bx4_r4b"}, + {"1011100"_b, "st2_asisdlsop_bx2_r2b"}, + {"1011101"_b, "st4_asisdlsop_bx4_r4b"}, + {"1011110"_b, "_szjjgk"}, + {"1011111"_b, "_tvgklq"}, + {"110xxx0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"110xxx1"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1110xx0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"1110xx1"_b, "ld4_asisdlsop_bx4_r4b"}, + {"11110x0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"11110x1"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1111100"_b, "ld2_asisdlsop_bx2_r2b"}, + {"1111101"_b, "ld4_asisdlsop_bx4_r4b"}, + {"1111110"_b, "_tzsvyv"}, + {"1111111"_b, "_jvnsgt"}, + }, + }, + + { "_rztvnl", + {20, 19, 18, 17, 16}, + { {"0000x"_b, "fcadd_z_p_zz"}, + {"10000"_b, "faddp_z_p_zz"}, + {"10100"_b, "fmaxnmp_z_p_zz"}, + {"10101"_b, "fminnmp_z_p_zz"}, + {"10110"_b, "fmaxp_z_p_zz"}, + {"10111"_b, "fminp_z_p_zz"}, + }, + }, + + { "_rzymmk", + {13, 12}, + { {"00"_b, "sbc_32_addsub_carry"}, + }, + }, + + { "_rzzxsn", + {30, 13}, + { {"00"_b, "_nvyxmh"}, + {"01"_b, "_hykhmt"}, + {"10"_b, "_yszjsm"}, + {"11"_b, "_jrnxzh"}, + }, + }, + + { "_sghgtk", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, + }, + }, + + { "_sgmpvp", + {23, 22, 13}, + { {"000"_b, "fmulx_asimdelem_rh_h"}, + {"1x0"_b, "fmulx_asimdelem_r_sd"}, + }, + }, + + { "_shgktt", + {11}, + { {"0"_b, "_tjjqpx"}, + }, + }, + + { "_shgxyq", + {23, 22, 19, 13, 12}, + { {"00100"_b, "sha1h_ss_cryptosha2"}, + {"00101"_b, "sha1su1_vv_cryptosha2"}, + {"00110"_b, "sha256su0_vv_cryptosha2"}, + {"xx011"_b, "suqadd_asisdmisc_r"}, + }, + }, + + { "_shqygv", + {30, 4}, + { {"00"_b, "_thvxym"}, + {"01"_b, "_mrhtxt"}, + {"10"_b, "_ptjyqx"}, + {"11"_b, "_rshyht"}, + }, + }, + + { "_shqyqv", + {23, 13, 12}, + { {"010"_b, "fcmeq_asisdsame_only"}, + }, + }, + + { "_shvqkt", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldlar_lr32_ldstexcl"}, + }, + }, + + { "_sjlqvg", + {23, 20, 19, 18, 17, 16}, + { {"000001"_b, "fcvtxn_asimdmisc_n"}, + {"x00000"_b, "uadalp_asimdmisc_p"}, + }, + }, + + { "_sjnspg", + {4}, + { {"0"_b, "nors_p_p_pp_z"}, + {"1"_b, "nands_p_p_pp_z"}, + }, + }, + + { "_sjtrhm", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1rqb_z_p_bi_u8"}, + {"000x0"_b, "ld1rqb_z_p_br_contiguous"}, + {"01001"_b, "ld1rqh_z_p_bi_u16"}, + {"010x0"_b, "ld1rqh_z_p_br_contiguous"}, + {"100x1"_b, "stnt1b_z_p_ar_d_64_unscaled"}, + {"101x1"_b, "stnt1b_z_p_ar_s_x32_unscaled"}, + {"110x1"_b, "stnt1h_z_p_ar_d_64_unscaled"}, + {"111x1"_b, "stnt1h_z_p_ar_s_x32_unscaled"}, + }, + }, + + { "_sjvhlq", + {22}, + { {"0"_b, "smullb_z_zzi_s"}, + {"1"_b, "smullb_z_zzi_d"}, + }, + }, + + { "_skjqrx", + {23, 22}, + { {"00"_b, "fmov_s_floatimm"}, + {"01"_b, "fmov_d_floatimm"}, + {"11"_b, "fmov_h_floatimm"}, + }, + }, + + { "_skmzll", + {18}, + { {"0"_b, "st2_asisdlsop_hx2_r2h"}, + {"1"_b, "st2_asisdlsop_h2_i2h"}, + }, + }, + + { "_sknvhk", + {13, 12, 11, 10}, + { {"0000"_b, "sha1c_qsv_cryptosha3"}, + {"0001"_b, "dup_asisdone_only"}, + {"0100"_b, "sha1p_qsv_cryptosha3"}, + {"1000"_b, "sha1m_qsv_cryptosha3"}, + {"1100"_b, "sha1su0_vvv_cryptosha3"}, + }, + }, + + { "_skqzyg", + {23}, + { {"0"_b, "fcmeq_asimdsame_only"}, + }, + }, + + { "_sksvrn", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldlarb_lr32_ldstexcl"}, + }, + }, + + { "_skszgm", + {13, 12, 11, 10}, + { {"1111"_b, "_xzmrlg"}, + }, + }, + + { "_skytvx", + {23, 22}, + { {"00"_b, "tbx_asimdtbl_l2_2"}, + }, + }, + + { "_slzrtr", + {23, 22}, + { {"00"_b, "fmsub_s_floatdp3"}, + {"01"_b, "fmsub_d_floatdp3"}, + {"11"_b, "fmsub_h_floatdp3"}, + }, + }, + + { "_slzvjh", + {30, 23, 22}, + { {"000"_b, "orr_32_log_imm"}, + {"100"_b, "ands_32s_log_imm"}, + {"110"_b, "movk_32_movewide"}, + }, + }, + + { "_smmrpj", + {18}, + { {"0"_b, "fadda_v_p_z"}, + }, + }, + + { "_smptxh", + {23, 22}, + { {"01"_b, "fmax_asimdsamefp16_only"}, + {"11"_b, "fmin_asimdsamefp16_only"}, + }, + }, + + { "_smsytm", + {13}, + { {"0"_b, "mul_asimdelem_r"}, + {"1"_b, "smull_asimdelem_l"}, + }, + }, + + { "_snhmgn", + {23}, + { {"0"_b, "fmul_asimdsame_only"}, + }, + }, + + { "_snhzxr", + {30, 23, 22}, + { {"001"_b, "bfm_64m_bitfield"}, + }, + }, + + { "_snjmrt", + {18}, + { {"0"_b, "st4_asisdlso_b4_4b"}, + }, + }, + + { "_snnlgr", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "movi_asimdimm_l_sl"}, + {"00x100"_b, "sshr_asimdshf_r"}, + {"00x110"_b, "srshr_asimdshf_r"}, + {"010x00"_b, "sshr_asimdshf_r"}, + {"010x10"_b, "srshr_asimdshf_r"}, + {"011100"_b, "sshr_asimdshf_r"}, + {"011110"_b, "srshr_asimdshf_r"}, + {"0x1000"_b, "sshr_asimdshf_r"}, + {"0x1010"_b, "srshr_asimdshf_r"}, + }, + }, + + { "_snrzky", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldxp_lp64_ldstexcl"}, + }, + }, + + { "_sntnsm", + {9, 8, 7, 6, 5}, + { {"11111"_b, "autizb_64z_dp_1src"}, + }, + }, + + { "_sntyqy", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_snvnjz", + {30, 13}, + { {"10"_b, "_plzqrv"}, + }, + }, + + { "_snvzjr", + {12}, + { {"0"_b, "st2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_snzvtt", + {23, 22}, + { {"00"_b, "fmlal2_asimdsame_f"}, + {"10"_b, "fmlsl2_asimdsame_f"}, + }, + }, + + { "_spktyg", + {23, 22, 20, 19, 11}, + { {"00000"_b, "movi_asimdimm_m_sm"}, + }, + }, + + { "_spxvlt", + {20, 19, 18, 17, 16, 13, 12, 11}, + { {"00000000"_b, "_mtkhgz"}, + }, + }, + + { "_sqgjmn", + {20, 9}, + { {"00"_b, "_mxgykv"}, + }, + }, + + { "_sqhxzj", + {30, 23, 22, 13, 12, 11, 10}, + { {"1010000"_b, "sha512h_qqv_cryptosha512_3"}, + {"1010001"_b, "sha512h2_qqv_cryptosha512_3"}, + {"1010010"_b, "sha512su1_vvv2_cryptosha512_3"}, + {"1010011"_b, "rax1_vvv2_cryptosha512_3"}, + }, + }, + + { "_sqkkqy", + {13, 12, 10}, + { {"010"_b, "sqrdmlah_asisdelem_r"}, + {"101"_b, "_mhksnq"}, + {"110"_b, "sqrdmlsh_asisdelem_r"}, + {"111"_b, "_mpytmv"}, + }, + }, + + { "_sqlsyr", + {18, 17}, + { {"00"_b, "ld1_asisdlse_r1_1v"}, + }, + }, + + { "_sqttsv", + {20, 19, 18, 17, 16, 4, 3}, + { {"0000001"_b, "fcmp_sz_floatcmp"}, + {"0000011"_b, "fcmpe_sz_floatcmp"}, + {"xxxxx00"_b, "fcmp_s_floatcmp"}, + {"xxxxx10"_b, "fcmpe_s_floatcmp"}, + }, + }, + + { "_srkslp", + {22, 20}, + { {"00"_b, "_zvynrg"}, + {"01"_b, "msr_sr_systemmove"}, + {"10"_b, "_lxlqks"}, + {"11"_b, "msrr_sr_systemmovepr"}, + }, + }, + + { "_srnkng", + {18}, + { {"0"_b, "faddv_v_p_z"}, + {"1"_b, "fmaxnmv_v_p_z"}, + }, + }, + + { "_srpptk", + {20, 19, 18, 17, 16}, + { {"00000"_b, "usqadd_asimdmisc_r"}, + {"00001"_b, "shll_asimdmisc_s"}, + {"10000"_b, "uaddlv_asimdall_only"}, + }, + }, + + { "_srpqmk", + {30, 23, 22}, + { {"000"_b, "stp_q_ldstpair_off"}, + {"001"_b, "ldp_q_ldstpair_off"}, + {"010"_b, "stp_q_ldstpair_pre"}, + {"011"_b, "ldp_q_ldstpair_pre"}, + }, + }, + + { "_srsrtk", + {30, 23, 22, 13, 11, 10}, + { {"000010"_b, "str_b_ldst_regoff"}, + {"000110"_b, "str_bl_ldst_regoff"}, + {"001010"_b, "ldr_b_ldst_regoff"}, + {"001110"_b, "ldr_bl_ldst_regoff"}, + {"010x10"_b, "str_q_ldst_regoff"}, + {"011x10"_b, "ldr_q_ldst_regoff"}, + {"100x10"_b, "str_h_ldst_regoff"}, + {"101x10"_b, "ldr_h_ldst_regoff"}, + }, + }, + + { "_srttng", + {23, 22}, + { {"01"_b, "fcmla_asimdelem_c_h"}, + {"10"_b, "fcmla_asimdelem_c_s"}, + }, + }, + + { "_ssjnph", + {10}, + { {"0"_b, "blraa_64p_branch_reg"}, + {"1"_b, "blrab_64p_branch_reg"}, + }, + }, + + { "_ssjrxs", + {18}, + { {"0"_b, "ld3_asisdlso_h3_3h"}, + }, + }, + + { "_ssqyrk", + {23, 22, 20, 19, 18, 17, 16}, + { {"0000000"_b, "not_asimdmisc_r"}, + {"0100000"_b, "rbit_asimdmisc_r"}, + }, + }, + + { "_ssvpxz", + {30, 23, 22}, + { {"000"_b, "stnp_32_ldstnapair_offs"}, + {"001"_b, "ldnp_32_ldstnapair_offs"}, + {"010"_b, "stp_32_ldstpair_post"}, + {"011"_b, "ldp_32_ldstpair_post"}, + {"110"_b, "stgp_64_ldstpair_post"}, + {"111"_b, "ldpsw_64_ldstpair_post"}, + }, + }, + + { "_ssypmm", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_h_floatimm"}, + }, + }, + + { "_stlgrr", + {30, 23, 22, 13, 12, 11, 10}, + { {"0001111"_b, "caspl_cp32_ldstexcl"}, + {"0011111"_b, "caspal_cp32_ldstexcl"}, + {"0101111"_b, "caslb_c32_ldstexcl"}, + {"0111111"_b, "casalb_c32_ldstexcl"}, + {"1001111"_b, "caspl_cp64_ldstexcl"}, + {"1011111"_b, "caspal_cp64_ldstexcl"}, + {"1101111"_b, "caslh_c32_ldstexcl"}, + {"1111111"_b, "casalh_c32_ldstexcl"}, + }, + }, + + { "_stmtkr", + {30, 23, 22}, + { {"000"_b, "stxr_sr32_ldstexcl"}, + {"001"_b, "_zlvjrh"}, + {"010"_b, "_lpzgvs"}, + {"011"_b, "_shvqkt"}, + {"100"_b, "stxr_sr64_ldstexcl"}, + {"101"_b, "_jhltlz"}, + {"110"_b, "_rsmyth"}, + {"111"_b, "_vjtgmx"}, + }, + }, + + { "_svgvjm", + {23}, + { {"0"_b, "faddp_asimdsame_only"}, + {"1"_b, "fabd_asimdsame_only"}, + }, + }, + + { "_svlrvy", + {18, 17}, + { {"00"_b, "st1_asisdlse_r3_3v"}, + }, + }, + + { "_svvylr", + {13, 12}, + { {"10"_b, "lslv_32_dp_2src"}, + }, + }, + + { "_svvyrz", + {23, 22, 20, 19, 18, 17, 16}, + { {"00xxxxx"_b, "addvl_r_ri"}, + {"01xxxxx"_b, "addpl_r_ri"}, + {"1011111"_b, "rdvl_r_i"}, + }, + }, + + { "_svyszp", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_d_floatimm"}, + }, + }, + + { "_sxgnmg", + {18, 17}, + { {"00"_b, "ld3_asisdlso_s3_3s"}, + }, + }, + + { "_sxptnh", + {23, 22, 11, 10}, + { {"0000"_b, "_vmtkqp"}, + {"0001"_b, "_lqjlkj"}, + {"0010"_b, "_gyymmx"}, + {"0011"_b, "_gmqyjv"}, + {"0100"_b, "_pvtyjz"}, + {"0101"_b, "_hxxxyy"}, + {"0110"_b, "_xszmjn"}, + {"1000"_b, "_lzjyhm"}, + {"1001"_b, "_zlkygr"}, + {"1010"_b, "_jvpjsm"}, + {"1101"_b, "_vzyklr"}, + {"1110"_b, "_npxkzq"}, + }, + }, + + { "_sxpvym", + {30, 23, 22, 13}, + { {"0000"_b, "ldnt1sb_z_p_ar_s_x32_unscaled"}, + {"0001"_b, "ldnt1b_z_p_ar_s_x32_unscaled"}, + {"0010"_b, "ld1rb_z_p_bi_u8"}, + {"0011"_b, "ld1rb_z_p_bi_u16"}, + {"0100"_b, "ldnt1sh_z_p_ar_s_x32_unscaled"}, + {"0101"_b, "ldnt1h_z_p_ar_s_x32_unscaled"}, + {"0110"_b, "ld1rsw_z_p_bi_s64"}, + {"0111"_b, "ld1rh_z_p_bi_u16"}, + {"1000"_b, "ldnt1sb_z_p_ar_d_64_unscaled"}, + {"1010"_b, "ld1sb_z_p_bz_d_64_unscaled"}, + {"1011"_b, "ldff1sb_z_p_bz_d_64_unscaled"}, + {"1100"_b, "ldnt1sh_z_p_ar_d_64_unscaled"}, + {"1110"_b, "ld1sh_z_p_bz_d_64_unscaled"}, + {"1111"_b, "ldff1sh_z_p_bz_d_64_unscaled"}, + }, + }, + + { "_sxsgmq", + {30, 22, 20, 19, 18, 17, 16}, + { {"00xxxxx"_b, "stxp_sp32_ldstexcl"}, + {"0111111"_b, "ldxp_lp32_ldstexcl"}, + {"10xxxxx"_b, "stxp_sp64_ldstexcl"}, + {"1111111"_b, "ldxp_lp64_ldstexcl"}, + }, + }, + + { "_sxsxxt", + {20, 19, 18, 17, 16}, + { {"10000"_b, "fminp_asisdpair_only_sd"}, + }, + }, + + { "_sylkvm", + {23, 22, 12}, + { {"100"_b, "fmlsl2_asimdelem_lh"}, + {"xx1"_b, "sqrdmlah_asimdelem_r"}, + }, + }, + + { "_syrmmr", + {18, 4}, + { {"00"_b, "fcmeq_p_p_z0"}, + }, + }, + + { "_szgqrr", + {12, 10}, + { {"00"_b, "_xlyjsz"}, + {"01"_b, "_yppmkl"}, + {"10"_b, "_sgmpvp"}, + {"11"_b, "_gjtmjg"}, + }, + }, + + { "_szjjgk", + {18}, + { {"0"_b, "st2_asisdlsop_bx2_r2b"}, + {"1"_b, "st2_asisdlsop_b2_i2b"}, + }, + }, + + { "_szmnhg", + {12}, + { {"0"_b, "ld2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_szmyzt", + {12}, + { {"0"_b, "ld3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_szqlsn", + {23, 22, 20, 19, 18, 17, 16}, + { {"0x00001"_b, "frint32z_asimdmisc_r"}, + {"1111000"_b, "fcmlt_asimdmiscfp16_fz"}, + {"1x00000"_b, "fcmlt_asimdmisc_fz"}, + }, + }, + + { "_sztkhs", + {30, 23, 22}, + { {"000"_b, "msub_64a_dp_3src"}, + }, + }, + + { "_szylpy", + {22, 12}, + { {"10"_b, "_hhlmrg"}, + }, + }, + + { "_szysqh", + {22, 13, 12}, + { {"000"_b, "ldsmax_32_memop"}, + {"001"_b, "ldsmin_32_memop"}, + {"010"_b, "ldumax_32_memop"}, + {"011"_b, "ldumin_32_memop"}, + {"100"_b, "ldsmaxl_32_memop"}, + {"101"_b, "ldsminl_32_memop"}, + {"110"_b, "ldumaxl_32_memop"}, + {"111"_b, "lduminl_32_memop"}, + }, + }, + + { "_tgvkhm", + {20, 19, 18, 17, 16, 13}, + { {"000000"_b, "fabs_s_floatdp1"}, + {"000010"_b, "fsqrt_s_floatdp1"}, + {"000100"_b, "fcvt_ds_floatdp1"}, + {"000110"_b, "fcvt_hs_floatdp1"}, + {"001000"_b, "frintp_s_floatdp1"}, + {"001010"_b, "frintz_s_floatdp1"}, + {"001110"_b, "frinti_s_floatdp1"}, + {"010000"_b, "frint32x_s_floatdp1"}, + {"010010"_b, "frint64x_s_floatdp1"}, + }, + }, + + { "_thkkgx", + {18}, + { {"1"_b, "fminnmv_v_p_z"}, + }, + }, + + { "_thqgrq", + {13, 12, 11, 10}, + { {"1111"_b, "_pgmlrt"}, + }, + }, + + { "_thrxph", + {23, 22, 10}, + { {"100"_b, "umlalb_z_zzzi_s"}, + {"101"_b, "umlalt_z_zzzi_s"}, + {"110"_b, "umlalb_z_zzzi_d"}, + {"111"_b, "umlalt_z_zzzi_d"}, + }, + }, + + { "_thvxym", + {20}, + { {"0"_b, "_prkmty"}, + {"1"_b, "_pjgkjs"}, + }, + }, + + { "_tjjqpx", + {23, 22, 20, 19, 16, 13, 10}, + { {"0000000"_b, "_mlgmqm"}, + {"0000001"_b, "_mvqkzv"}, + {"0000010"_b, "_jztspt"}, + {"0000011"_b, "_hrpkqg"}, + {"0100000"_b, "_llqtkj"}, + {"0100001"_b, "_pmpsvs"}, + {"0100010"_b, "_vhrkvk"}, + {"0100011"_b, "_xsvpzx"}, + {"100xx00"_b, "st2_asisdlsop_sx2_r2s"}, + {"100xx01"_b, "_ynyqky"}, + {"100xx10"_b, "st4_asisdlsop_sx4_r4s"}, + {"100xx11"_b, "_grvxrm"}, + {"1010x00"_b, "st2_asisdlsop_sx2_r2s"}, + {"1010x01"_b, "_snvzjr"}, + {"1010x10"_b, "st4_asisdlsop_sx4_r4s"}, + {"1010x11"_b, "_xmkysx"}, + {"1011000"_b, "st2_asisdlsop_sx2_r2s"}, + {"1011001"_b, "_xqhxql"}, + {"1011010"_b, "st4_asisdlsop_sx4_r4s"}, + {"1011011"_b, "_ykpqth"}, + {"1011100"_b, "_lgyqpk"}, + {"1011101"_b, "_tplghv"}, + {"1011110"_b, "_lqknkn"}, + {"1011111"_b, "_zprgxt"}, + {"110xx00"_b, "ld2_asisdlsop_sx2_r2s"}, + {"110xx01"_b, "_prjzxs"}, + {"110xx10"_b, "ld4_asisdlsop_sx4_r4s"}, + {"110xx11"_b, "_txsvzz"}, + {"1110x00"_b, "ld2_asisdlsop_sx2_r2s"}, + {"1110x01"_b, "_hljttg"}, + {"1110x10"_b, "ld4_asisdlsop_sx4_r4s"}, + {"1110x11"_b, "_rknxlg"}, + {"1111000"_b, "ld2_asisdlsop_sx2_r2s"}, + {"1111001"_b, "_szmnhg"}, + {"1111010"_b, "ld4_asisdlsop_sx4_r4s"}, + {"1111011"_b, "_tjrtxx"}, + {"1111100"_b, "_ppvnly"}, + {"1111101"_b, "_lltzjg"}, + {"1111110"_b, "_ypsgqz"}, + {"1111111"_b, "_vnrlsj"}, + }, + }, + + { "_tjlthk", + {9, 8, 7, 6, 5, 1}, + { {"111110"_b, "drps_64e_branch_reg"}, + }, + }, + + { "_tjnzjl", + {18, 17}, + { {"00"_b, "st1_asisdlso_s1_1s"}, + }, + }, + + { "_tjrtxx", + {12}, + { {"0"_b, "ld4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_tjxhsy", + {10}, + { {"0"_b, "braa_64p_branch_reg"}, + {"1"_b, "brab_64p_branch_reg"}, + }, + }, + + { "_tjxyky", + {12}, + { {"0"_b, "st3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_tjzqnp", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ldnt1b_z_p_bi_contiguous"}, + {"000x0"_b, "ldnt1b_z_p_br_contiguous"}, + {"00101"_b, "ld3b_z_p_bi_contiguous"}, + {"001x0"_b, "ld3b_z_p_br_contiguous"}, + {"01001"_b, "ldnt1h_z_p_bi_contiguous"}, + {"010x0"_b, "ldnt1h_z_p_br_contiguous"}, + {"01101"_b, "ld3h_z_p_bi_contiguous"}, + {"011x0"_b, "ld3h_z_p_br_contiguous"}, + {"10011"_b, "stnt1b_z_p_bi_contiguous"}, + {"100x0"_b, "st1b_z_p_bz_d_x32_unscaled"}, + {"10111"_b, "st3b_z_p_bi_contiguous"}, + {"101x0"_b, "st1b_z_p_bz_s_x32_unscaled"}, + {"10x01"_b, "st1b_z_p_bi"}, + {"11011"_b, "stnt1h_z_p_bi_contiguous"}, + {"110x0"_b, "st1h_z_p_bz_d_x32_unscaled"}, + {"11111"_b, "st3h_z_p_bi_contiguous"}, + {"111x0"_b, "st1h_z_p_bz_s_x32_unscaled"}, + {"11x01"_b, "st1h_z_p_bi"}, + }, + }, + + { "_tkjtgp", + {30}, + { {"0"_b, "_sqgjmn"}, + {"1"_b, "_ztpryr"}, + }, + }, + + { "_tklxhy", + {18}, + { {"0"_b, "st3_asisdlso_b3_3b"}, + }, + }, + + { "_tknqxs", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldaxr_lr64_ldstexcl"}, + }, + }, + + { "_tktgvg", + {18}, + { {"0"_b, "ld4_asisdlso_b4_4b"}, + }, + }, + + { "_tlvmlq", + {18}, + { {"0"_b, "ld2_asisdlso_h2_2h"}, + }, + }, + + { "_tmhlvh", + {20, 9, 4}, + { {"000"_b, "zip2_p_pp"}, + }, + }, + + { "_tmsjzg", + {2, 1}, + { {"00"_b, "ret_64r_branch_reg"}, + }, + }, + + { "_tmtgqm", + {4}, + { {"0"_b, "ccmn_64_condcmp_imm"}, + }, + }, + + { "_tmtnkq", + {23, 18, 17, 16}, + { {"0000"_b, "uqxtnb_z_zz"}, + }, + }, + + { "_tnjhxp", + {9, 8, 7, 6, 5}, + { {"11111"_b, "pacdza_64z_dp_1src"}, + }, + }, + + { "_tnngsg", + {23, 22, 13, 12, 11, 10}, + { {"01x1x0"_b, "fcmla_asimdelem_c_h"}, + {"0x0001"_b, "ushr_asimdshf_r"}, + {"0x0101"_b, "usra_asimdshf_r"}, + {"0x1001"_b, "urshr_asimdshf_r"}, + {"0x1101"_b, "ursra_asimdshf_r"}, + {"10x1x0"_b, "fcmla_asimdelem_c_s"}, + {"xx00x0"_b, "mla_asimdelem_r"}, + {"xx10x0"_b, "umlal_asimdelem_l"}, + }, + }, + + { "_tnpjts", + {30}, + { {"0"_b, "and_64_log_shift"}, + {"1"_b, "eor_64_log_shift"}, + }, + }, + + { "_tpkslq", + {30, 23, 22, 20, 13, 4}, + { {"00001x"_b, "ld1rqw_z_p_bi_u32"}, + {"000x0x"_b, "ld1rqw_z_p_br_contiguous"}, + {"01001x"_b, "ld1rqd_z_p_bi_u64"}, + {"010x0x"_b, "ld1rqd_z_p_br_contiguous"}, + {"100x1x"_b, "stnt1w_z_p_ar_d_64_unscaled"}, + {"101x1x"_b, "stnt1w_z_p_ar_s_x32_unscaled"}, + {"110x00"_b, "str_p_bi"}, + {"110x1x"_b, "stnt1d_z_p_ar_d_64_unscaled"}, + }, + }, + + { "_tplghv", + {18, 17, 12}, + { {"0x0"_b, "st2_asisdlsop_dx2_r2d"}, + {"100"_b, "st2_asisdlsop_dx2_r2d"}, + {"110"_b, "st2_asisdlsop_d2_i2d"}, + }, + }, + + { "_tpmqyl", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_lszlkq"}, + }, + }, + + { "_tptqjs", + {12}, + { {"0"_b, "ld1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_tqlrzh", + {9, 8, 7, 6, 5, 2, 1}, + { {"1111111"_b, "eretab_64e_branch_reg"}, + }, + }, + + { "_tqlsyy", + {30}, + { {"0"_b, "add_32_addsub_shift"}, + {"1"_b, "sub_32_addsub_shift"}, + }, + }, + + { "_trjmmn", + {13, 12, 11, 10}, + { {"0001"_b, "sub_asisdsame_only"}, + {"0010"_b, "_plyhhz"}, + {"0011"_b, "cmeq_asisdsame_only"}, + {"0110"_b, "_qkhrkh"}, + {"1010"_b, "_kxhmlx"}, + {"1101"_b, "sqrdmulh_asisdsame_only"}, + {"1110"_b, "_ytrmvz"}, + }, + }, + + { "_tshjsk", + {18}, + { {"0"_b, "st4_asisdlsep_r4_r"}, + {"1"_b, "st4_asisdlsep_i4_i"}, + }, + }, + + { "_tsskys", + {23, 22, 18, 17, 16}, + { {"01000"_b, "fadd_z_p_zz"}, + {"01001"_b, "fsub_z_p_zz"}, + {"01010"_b, "fmul_z_p_zz"}, + {"01100"_b, "fmaxnm_z_p_zz"}, + {"01101"_b, "fminnm_z_p_zz"}, + {"01110"_b, "fmax_z_p_zz"}, + {"01111"_b, "fmin_z_p_zz"}, + {"1x000"_b, "fadd_z_p_zz"}, + {"1x001"_b, "fsub_z_p_zz"}, + {"1x010"_b, "fmul_z_p_zz"}, + {"1x100"_b, "fmaxnm_z_p_zz"}, + {"1x101"_b, "fminnm_z_p_zz"}, + {"1x110"_b, "fmax_z_p_zz"}, + {"1x111"_b, "fmin_z_p_zz"}, + {"xx011"_b, "fsubr_z_p_zz"}, + }, + }, + + { "_tsypsz", + {23, 22, 13, 12}, + { {"0000"_b, "fnmul_s_floatdp2"}, + {"0100"_b, "fnmul_d_floatdp2"}, + {"1100"_b, "fnmul_h_floatdp2"}, + }, + }, + + { "_ttmvpr", + {30, 23, 22, 20, 19}, + { {"0xxxx"_b, "bl_only_branch_imm"}, + {"10001"_b, "sys_cr_systeminstrs"}, + {"1001x"_b, "msr_sr_systemmove"}, + {"10101"_b, "sysp_cr_syspairinstrs"}, + {"1011x"_b, "msrr_sr_systemmovepr"}, + }, + }, + + { "_ttmyrv", + {30, 11, 10}, + { {"000"_b, "_nynrns"}, + {"001"_b, "_rrkmyl"}, + {"010"_b, "_rvvshx"}, + {"011"_b, "_zlmyjt"}, + {"101"_b, "_yrggjm"}, + {"110"_b, "_kskqmz"}, + {"111"_b, "_kzksnv"}, + }, + }, + + { "_ttplgp", + {12, 11, 10}, + { {"000"_b, "sqincp_z_p_z"}, + {"010"_b, "sqincp_r_p_r_sx"}, + {"011"_b, "sqincp_r_p_r_x"}, + {"100"_b, "_zqmrhp"}, + }, + }, + + { "_ttsgkt", + {12, 10}, + { {"00"_b, "_smsytm"}, + {"01"_b, "_mjrlkp"}, + {"10"_b, "_vjkhhm"}, + {"11"_b, "_ymxjjr"}, + }, + }, + + { "_ttzlqn", + {18, 17, 12}, + { {"000"_b, "ld1_asisdlso_d1_1d"}, + }, + }, + + { "_tvgklq", + {18}, + { {"0"_b, "st4_asisdlsop_bx4_r4b"}, + {"1"_b, "st4_asisdlsop_b4_i4b"}, + }, + }, + + { "_tvrlgz", + {18}, + { {"0"_b, "st1_asisdlsop_bx1_r1b"}, + {"1"_b, "st1_asisdlsop_b1_i1b"}, + }, + }, + + { "_tvtvkt", + {18, 17, 12}, + { {"000"_b, "ldap1_asisdlso_d1"}, + }, + }, + + { "_tvyxlr", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_jlnjsy"}, + }, + }, + + { "_txkmvh", + {18}, + { {"0"_b, "ld2_asisdlse_r2"}, + }, + }, + + { "_txsvzz", + {12}, + { {"0"_b, "ld4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_txzxzs", + {23, 22, 20, 19, 18}, + { {"00000"_b, "orr_z_zi"}, + {"01000"_b, "eor_z_zi"}, + {"10000"_b, "and_z_zi"}, + {"11000"_b, "dupm_z_i"}, + {"xx1xx"_b, "cpy_z_p_i"}, + }, + }, + + { "_tykvnx", + {30}, + { {"0"_b, "ldapr_32l_ldapstl_writeback"}, + {"1"_b, "ldapr_64l_ldapstl_writeback"}, + }, + }, + + { "_tymryz", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "bic_asimdimm_l_sl"}, + {"00x100"_b, "sli_asimdshf_r"}, + {"00x110"_b, "uqshl_asimdshf_r"}, + {"010x00"_b, "sli_asimdshf_r"}, + {"010x10"_b, "uqshl_asimdshf_r"}, + {"011100"_b, "sli_asimdshf_r"}, + {"011110"_b, "uqshl_asimdshf_r"}, + {"0x1000"_b, "sli_asimdshf_r"}, + {"0x1010"_b, "uqshl_asimdshf_r"}, + }, + }, + + { "_tytvjk", + {13, 12, 11}, + { {"000"_b, "_lylpyx"}, + {"001"_b, "_kyxrqg"}, + {"010"_b, "_zmkqxl"}, + {"011"_b, "_gngjxr"}, + {"100"_b, "_mlxtxs"}, + {"101"_b, "_mnmtql"}, + {"110"_b, "_xmxpnx"}, + {"111"_b, "_lkttgy"}, + }, + }, + + { "_tytzpq", + {30}, + { {"0"_b, "bic_32_log_shift"}, + {"1"_b, "eon_32_log_shift"}, + }, + }, + + { "_tyzpxk", + {22, 13, 12}, + { {"000"_b, "swpa_64_memop"}, + {"100"_b, "swpal_64_memop"}, + }, + }, + + { "_tzgtvm", + {13, 12}, + { {"00"_b, "crc32x_64c_dp_2src"}, + {"01"_b, "crc32cx_64c_dp_2src"}, + {"10"_b, "umin_64_dp_2src"}, + }, + }, + + { "_tzjyhy", + {20, 19, 18, 17, 16}, + { {"00010"_b, "scvtf_d32_float2fix"}, + {"00011"_b, "ucvtf_d32_float2fix"}, + {"11000"_b, "fcvtzs_32d_float2fix"}, + {"11001"_b, "fcvtzu_32d_float2fix"}, + }, + }, + + { "_tzrgqq", + {23, 10}, + { {"00"_b, "_gyrkkz"}, + }, + }, + + { "_tzsnmy", + {9, 8, 7, 6, 5, 2, 1}, + { {"1111111"_b, "retab_64e_branch_reg"}, + }, + }, + + { "_tzsvyv", + {18}, + { {"0"_b, "ld2_asisdlsop_bx2_r2b"}, + {"1"_b, "ld2_asisdlsop_b2_i2b"}, + }, + }, + + { "_tzzssm", + {12, 11, 10}, + { {"000"_b, "histseg_z_zz"}, + }, + }, + + { "_vghjnt", + {23, 22}, + { {"00"_b, "fmadd_s_floatdp3"}, + {"01"_b, "fmadd_d_floatdp3"}, + {"11"_b, "fmadd_h_floatdp3"}, + }, + }, + + { "_vgqvys", + {30, 23, 22}, + { {"000"_b, "stp_32_ldstpair_off"}, + {"001"_b, "ldp_32_ldstpair_off"}, + {"010"_b, "stp_32_ldstpair_pre"}, + {"011"_b, "ldp_32_ldstpair_pre"}, + {"100"_b, "stgp_64_ldstpair_off"}, + {"101"_b, "ldpsw_64_ldstpair_off"}, + {"110"_b, "stgp_64_ldstpair_pre"}, + {"111"_b, "ldpsw_64_ldstpair_pre"}, + }, + }, + + { "_vgtnjh", + {23, 22, 20, 19, 18, 17, 16}, + { {"0001010"_b, "fcvtxnt_z_p_z_d2s"}, + {"1001000"_b, "fcvtnt_z_p_z_s2h"}, + {"1001001"_b, "fcvtlt_z_p_z_h2s"}, + {"1001010"_b, "bfcvtnt_z_p_z_s2bf"}, + {"1101010"_b, "fcvtnt_z_p_z_d2s"}, + {"1101011"_b, "fcvtlt_z_p_z_s2d"}, + }, + }, + + { "_vgxtvy", + {23, 22, 20, 19, 18, 17, 16, 13, 12, 11}, + { {"0011111001"_b, "_tjxhsy"}, + }, + }, + + { "_vhkjgh", + {30, 23, 22, 20, 19, 18}, + { {"00xxxx"_b, "add_64_addsub_imm"}, + {"011000"_b, "smax_64_minmax_imm"}, + {"011001"_b, "umax_64u_minmax_imm"}, + {"011010"_b, "smin_64_minmax_imm"}, + {"011011"_b, "umin_64u_minmax_imm"}, + {"10xxxx"_b, "sub_64_addsub_imm"}, + }, + }, + + { "_vhkpvn", + {20, 18, 17, 16}, + { {"0000"_b, "_grktgm"}, + }, + }, + + { "_vhlqpr", + {30, 22, 11, 10}, + { {"0000"_b, "csel_64_condsel"}, + {"0001"_b, "csinc_64_condsel"}, + {"0100"_b, "_xgqhjv"}, + {"0101"_b, "_hspyhv"}, + {"0110"_b, "_qkxmvp"}, + {"0111"_b, "_tzgtvm"}, + {"1000"_b, "csinv_64_condsel"}, + {"1001"_b, "csneg_64_condsel"}, + {"1100"_b, "_hlqvmm"}, + {"1101"_b, "_ghrnmz"}, + }, + }, + + { "_vhrkvk", + {18, 17}, + { {"00"_b, "ld4_asisdlso_s4_4s"}, + }, + }, + + { "_vjhrzl", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frintx_asimdmiscfp16_r"}, + {"0x00001"_b, "frintx_asimdmisc_r"}, + {"1111001"_b, "frinti_asimdmiscfp16_r"}, + {"1x00001"_b, "frinti_asimdmisc_r"}, + {"xx00000"_b, "cmle_asimdmisc_z"}, + }, + }, + + { "_vjkhhm", + {23, 22, 13}, + { {"000"_b, "fmul_asimdelem_rh_h"}, + {"1x0"_b, "fmul_asimdelem_r_sd"}, + {"xx1"_b, "sqdmull_asimdelem_l"}, + }, + }, + + { "_vjmklj", + {23, 22}, + { {"10"_b, "sqrdcmlah_z_zzzi_h"}, + {"11"_b, "sqrdcmlah_z_zzzi_s"}, + }, + }, + + { "_vjtgmx", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldlar_lr64_ldstexcl"}, + }, + }, + + { "_vkrkks", + {30, 23, 22, 13, 4}, + { {"00000"_b, "prfb_i_p_br_s"}, + {"00010"_b, "prfb_i_p_ai_s"}, + {"0010x"_b, "ld1rb_z_p_bi_u32"}, + {"0011x"_b, "ld1rb_z_p_bi_u64"}, + {"01000"_b, "prfh_i_p_br_s"}, + {"01010"_b, "prfh_i_p_ai_s"}, + {"0110x"_b, "ld1rh_z_p_bi_u32"}, + {"0111x"_b, "ld1rh_z_p_bi_u64"}, + {"1000x"_b, "ldnt1b_z_p_ar_d_64_unscaled"}, + {"10010"_b, "prfb_i_p_ai_d"}, + {"1010x"_b, "ld1b_z_p_bz_d_64_unscaled"}, + {"1011x"_b, "ldff1b_z_p_bz_d_64_unscaled"}, + {"1100x"_b, "ldnt1h_z_p_ar_d_64_unscaled"}, + {"11010"_b, "prfh_i_p_ai_d"}, + {"1110x"_b, "ld1h_z_p_bz_d_64_unscaled"}, + {"1111x"_b, "ldff1h_z_p_bz_d_64_unscaled"}, + }, + }, + + { "_vkrskv", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx00"_b, "stlur_32_ldapstl_unscaled"}, + {"001xx00"_b, "ldapur_32_ldapstl_unscaled"}, + {"010xx00"_b, "ldapursw_64_ldapstl_unscaled"}, + {"100xx00"_b, "stlur_64_ldapstl_unscaled"}, + {"101xx00"_b, "ldapur_64_ldapstl_unscaled"}, + {"x000001"_b, "cpyfprn_cpy_memcms"}, + {"x000101"_b, "cpyfpwtrn_cpy_memcms"}, + {"x001001"_b, "cpyfprtrn_cpy_memcms"}, + {"x001101"_b, "cpyfptrn_cpy_memcms"}, + {"x010001"_b, "cpyfmrn_cpy_memcms"}, + {"x010101"_b, "cpyfmwtrn_cpy_memcms"}, + {"x011001"_b, "cpyfmrtrn_cpy_memcms"}, + {"x011101"_b, "cpyfmtrn_cpy_memcms"}, + {"x100001"_b, "cpyfern_cpy_memcms"}, + {"x100101"_b, "cpyfewtrn_cpy_memcms"}, + {"x101001"_b, "cpyfertrn_cpy_memcms"}, + {"x101101"_b, "cpyfetrn_cpy_memcms"}, + {"x110001"_b, "sete_set_memcms"}, + {"x110101"_b, "setet_set_memcms"}, + {"x111001"_b, "seten_set_memcms"}, + {"x111101"_b, "setetn_set_memcms"}, + }, + }, + + { "_vlhkgr", + {20, 19, 18, 17, 16}, + { {"00000"_b, "uaddlp_asimdmisc_p"}, + {"00001"_b, "sqxtun_asimdmisc_n"}, + }, + }, + + { "_vllmnt", + {20, 19, 18, 17}, + { {"0000"_b, "_gmtjvr"}, + }, + }, + + { "_vlrhpy", + {30, 23, 22, 13, 4}, + { {"0000x"_b, "ld1sb_z_p_ai_s"}, + {"0001x"_b, "ldff1sb_z_p_ai_s"}, + {"0010x"_b, "ld1rb_z_p_bi_u8"}, + {"0011x"_b, "ld1rb_z_p_bi_u16"}, + {"0100x"_b, "ld1sh_z_p_ai_s"}, + {"0101x"_b, "ldff1sh_z_p_ai_s"}, + {"0110x"_b, "ld1rsw_z_p_bi_s64"}, + {"0111x"_b, "ld1rh_z_p_bi_u16"}, + {"1000x"_b, "ld1sb_z_p_ai_d"}, + {"1001x"_b, "ldff1sb_z_p_ai_d"}, + {"10100"_b, "prfb_i_p_bz_d_64_scaled"}, + {"10110"_b, "prfh_i_p_bz_d_64_scaled"}, + {"1100x"_b, "ld1sh_z_p_ai_d"}, + {"1101x"_b, "ldff1sh_z_p_ai_d"}, + {"1110x"_b, "ld1sh_z_p_bz_d_64_scaled"}, + {"1111x"_b, "ldff1sh_z_p_bz_d_64_scaled"}, + }, + }, + + { "_vlxrps", + {9, 8, 7, 6, 5}, + { {"00000"_b, "fmov_d_floatimm"}, + }, + }, + + { "_vmgnhk", + {30, 23}, + { {"00"_b, "add_64_addsub_imm"}, + {"10"_b, "sub_64_addsub_imm"}, + }, + }, + + { "_vmsxgq", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx00"_b, "stlur_32_ldapstl_unscaled"}, + {"001xx00"_b, "ldapur_32_ldapstl_unscaled"}, + {"010xx00"_b, "ldapursw_64_ldapstl_unscaled"}, + {"100xx00"_b, "stlur_64_ldapstl_unscaled"}, + {"101xx00"_b, "ldapur_64_ldapstl_unscaled"}, + {"x000001"_b, "cpyfpwn_cpy_memcms"}, + {"x000101"_b, "cpyfpwtwn_cpy_memcms"}, + {"x001001"_b, "cpyfprtwn_cpy_memcms"}, + {"x001101"_b, "cpyfptwn_cpy_memcms"}, + {"x010001"_b, "cpyfmwn_cpy_memcms"}, + {"x010101"_b, "cpyfmwtwn_cpy_memcms"}, + {"x011001"_b, "cpyfmrtwn_cpy_memcms"}, + {"x011101"_b, "cpyfmtwn_cpy_memcms"}, + {"x100001"_b, "cpyfewn_cpy_memcms"}, + {"x100101"_b, "cpyfewtwn_cpy_memcms"}, + {"x101001"_b, "cpyfertwn_cpy_memcms"}, + {"x101101"_b, "cpyfetwn_cpy_memcms"}, + {"x110001"_b, "setm_set_memcms"}, + {"x110101"_b, "setmt_set_memcms"}, + {"x111001"_b, "setmn_set_memcms"}, + {"x111101"_b, "setmtn_set_memcms"}, + }, + }, + + { "_vmtkqp", + {30}, + { {"0"_b, "stlur_32_ldapstl_unscaled"}, + {"1"_b, "stlur_64_ldapstl_unscaled"}, + }, + }, + + { "_vmxzxt", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmulx_asimdelem_rh_h"}, + {"0x0001"_b, "sqshrun_asimdshf_n"}, + {"0x0011"_b, "sqrshrun_asimdshf_n"}, + {"0x0101"_b, "uqshrn_asimdshf_n"}, + {"0x0111"_b, "uqrshrn_asimdshf_n"}, + {"0x1001"_b, "ushll_asimdshf_l"}, + {"1000x0"_b, "fmlal2_asimdelem_lh"}, + {"1x01x0"_b, "fmulx_asimdelem_r_sd"}, + {"xx10x0"_b, "umull_asimdelem_l"}, + }, + }, + + { "_vmyztj", + {30, 23, 22}, + { {"000"_b, "stp_64_ldstpair_off"}, + {"001"_b, "ldp_64_ldstpair_off"}, + {"010"_b, "stp_64_ldstpair_pre"}, + {"011"_b, "ldp_64_ldstpair_pre"}, + }, + }, + + { "_vnggzq", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx10"_b, "stlur_b_ldapstl_simd"}, + {"001xx10"_b, "ldapur_b_ldapstl_simd"}, + {"010xx10"_b, "stlur_q_ldapstl_simd"}, + {"011xx10"_b, "ldapur_q_ldapstl_simd"}, + {"100xx10"_b, "stlur_h_ldapstl_simd"}, + {"101xx10"_b, "ldapur_h_ldapstl_simd"}, + {"x000001"_b, "cpypn_cpy_memcms"}, + {"x000101"_b, "cpypwtn_cpy_memcms"}, + {"x001001"_b, "cpyprtn_cpy_memcms"}, + {"x001101"_b, "cpyptn_cpy_memcms"}, + {"x010001"_b, "cpymn_cpy_memcms"}, + {"x010101"_b, "cpymwtn_cpy_memcms"}, + {"x011001"_b, "cpymrtn_cpy_memcms"}, + {"x011101"_b, "cpymtn_cpy_memcms"}, + {"x100001"_b, "cpyen_cpy_memcms"}, + {"x100101"_b, "cpyewtn_cpy_memcms"}, + {"x101001"_b, "cpyertn_cpy_memcms"}, + {"x101101"_b, "cpyetn_cpy_memcms"}, + }, + }, + + { "_vnnjxg", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "fnmsub_s_floatdp3"}, + {"001xxxx"_b, "fnmsub_d_floatdp3"}, + {"011xxxx"_b, "fnmsub_h_floatdp3"}, + {"10x1001"_b, "scvtf_asisdshf_c"}, + {"10x1111"_b, "fcvtzs_asisdshf_c"}, + {"1xx00x0"_b, "sqdmulh_asisdelem_r"}, + {"1xx01x0"_b, "sqrdmulh_asisdelem_r"}, + }, + }, + + { "_vnrlrk", + {30}, + { {"0"_b, "orn_64_log_shift"}, + {"1"_b, "bics_64_log_shift"}, + }, + }, + + { "_vnrlsj", + {18, 17, 12}, + { {"0x0"_b, "ld4_asisdlsop_dx4_r4d"}, + {"100"_b, "ld4_asisdlsop_dx4_r4d"}, + {"110"_b, "ld4_asisdlsop_d4_i4d"}, + }, + }, + + { "_vnsqhn", + {30, 23, 11, 10}, + { {"0010"_b, "_plytvr"}, + {"0100"_b, "_zghtll"}, + {"0110"_b, "_ptkgrz"}, + {"1000"_b, "_xksqnh"}, + {"1001"_b, "ldraa_64_ldst_pac"}, + {"1010"_b, "_hyskth"}, + {"1011"_b, "ldraa_64w_ldst_pac"}, + {"1100"_b, "_kpgghm"}, + {"1101"_b, "ldrab_64_ldst_pac"}, + {"1110"_b, "_zxjkmj"}, + {"1111"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_vnzkty", + {30}, + { {"0"_b, "orr_64_log_shift"}, + {"1"_b, "ands_64_log_shift"}, + }, + }, + + { "_vpgxgk", + {20, 19, 18, 17, 16, 13, 12}, + { {"1111100"_b, "_rqzpzq"}, + }, + }, + + { "_vpjktn", + {30, 23, 22}, + { {"000"_b, "madd_64a_dp_3src"}, + }, + }, + + { "_vpknjg", + {13, 12}, + { {"00"_b, "sdiv_32_dp_2src"}, + {"10"_b, "rorv_32_dp_2src"}, + }, + }, + + { "_vpmxrj", + {13}, + { {"0"_b, "histcnt_z_p_zz"}, + {"1"_b, "_jxszhy"}, + }, + }, + + { "_vpyvjr", + {9, 8, 7, 6, 5}, + { {"11111"_b, "pacizb_64z_dp_1src"}, + }, + }, + + { "_vqrqjt", + {30, 23, 22, 11, 10}, + { {"01000"_b, "csel_32_condsel"}, + {"01001"_b, "csinc_32_condsel"}, + {"11000"_b, "csinv_32_condsel"}, + {"11001"_b, "csneg_32_condsel"}, + }, + }, + + { "_vqzsgg", + {20, 19, 18, 17, 16}, + { {"00010"_b, "scvtf_s32_float2fix"}, + {"00011"_b, "ucvtf_s32_float2fix"}, + {"11000"_b, "fcvtzs_32s_float2fix"}, + {"11001"_b, "fcvtzu_32s_float2fix"}, + }, + }, + + { "_vrjhtm", + {12}, + { {"0"_b, "sqdmulh_asimdelem_r"}, + {"1"_b, "sqrdmulh_asimdelem_r"}, + }, + }, + + { "_vrsgzg", + {30, 23, 22, 20, 19, 18}, + { {"00xxxx"_b, "add_64_addsub_imm"}, + {"010xxx"_b, "addg_64_addsub_immtags"}, + {"011000"_b, "smax_64_minmax_imm"}, + {"011001"_b, "umax_64u_minmax_imm"}, + {"011010"_b, "smin_64_minmax_imm"}, + {"011011"_b, "umin_64u_minmax_imm"}, + {"10xxxx"_b, "sub_64_addsub_imm"}, + {"110xxx"_b, "subg_64_addsub_immtags"}, + }, + }, + + { "_vrsjnp", + {13, 12, 11, 10}, + { {"1111"_b, "cas_c32_ldstexcl"}, + }, + }, + + { "_vrxhss", + {20, 19, 18, 17, 16}, + { {"00001"_b, "uqxtn_asisdmisc_n"}, + }, + }, + + { "_vryrnh", + {30, 22, 11}, + { {"001"_b, "_zsgpsn"}, + {"010"_b, "ccmn_32_condcmp_reg"}, + {"011"_b, "ccmn_32_condcmp_imm"}, + {"110"_b, "ccmp_32_condcmp_reg"}, + {"111"_b, "ccmp_32_condcmp_imm"}, + }, + }, + + { "_vrzksz", + {20, 19, 18, 17, 16, 13, 12}, + { {"1111100"_b, "ldaprh_32l_memop"}, + }, + }, + + { "_vshynq", + {30, 23, 22, 11, 10}, + { {"00000"_b, "sturb_32_ldst_unscaled"}, + {"00001"_b, "strb_32_ldst_immpost"}, + {"00010"_b, "sttrb_32_ldst_unpriv"}, + {"00011"_b, "strb_32_ldst_immpre"}, + {"00100"_b, "ldurb_32_ldst_unscaled"}, + {"00101"_b, "ldrb_32_ldst_immpost"}, + {"00110"_b, "ldtrb_32_ldst_unpriv"}, + {"00111"_b, "ldrb_32_ldst_immpre"}, + {"01000"_b, "ldursb_64_ldst_unscaled"}, + {"01001"_b, "ldrsb_64_ldst_immpost"}, + {"01010"_b, "ldtrsb_64_ldst_unpriv"}, + {"01011"_b, "ldrsb_64_ldst_immpre"}, + {"01100"_b, "ldursb_32_ldst_unscaled"}, + {"01101"_b, "ldrsb_32_ldst_immpost"}, + {"01110"_b, "ldtrsb_32_ldst_unpriv"}, + {"01111"_b, "ldrsb_32_ldst_immpre"}, + {"10000"_b, "sturh_32_ldst_unscaled"}, + {"10001"_b, "strh_32_ldst_immpost"}, + {"10010"_b, "sttrh_32_ldst_unpriv"}, + {"10011"_b, "strh_32_ldst_immpre"}, + {"10100"_b, "ldurh_32_ldst_unscaled"}, + {"10101"_b, "ldrh_32_ldst_immpost"}, + {"10110"_b, "ldtrh_32_ldst_unpriv"}, + {"10111"_b, "ldrh_32_ldst_immpre"}, + {"11000"_b, "ldursh_64_ldst_unscaled"}, + {"11001"_b, "ldrsh_64_ldst_immpost"}, + {"11010"_b, "ldtrsh_64_ldst_unpriv"}, + {"11011"_b, "ldrsh_64_ldst_immpre"}, + {"11100"_b, "ldursh_32_ldst_unscaled"}, + {"11101"_b, "ldrsh_32_ldst_immpost"}, + {"11110"_b, "ldtrsh_32_ldst_unpriv"}, + {"11111"_b, "ldrsh_32_ldst_immpre"}, + }, + }, + + { "_vsnnms", + {30, 13, 12, 11, 10}, + { {"00000"_b, "_xzntxr"}, + }, + }, + + { "_vsslrs", + {8}, + { {"0"_b, "tstart_br_systemresult"}, + {"1"_b, "ttest_br_systemresult"}, + }, + }, + + { "_vsyjql", + {4}, + { {"0"_b, "ccmn_32_condcmp_imm"}, + }, + }, + + { "_vtgnnl", + {30}, + { {"0"_b, "_qgsrqq"}, + {"1"_b, "_mgjhts"}, + }, + }, + + { "_vtllgt", + {10}, + { {"0"_b, "_nhnhzp"}, + }, + }, + + { "_vtyqhh", + {30}, + { {"0"_b, "and_32_log_shift"}, + {"1"_b, "eor_32_log_shift"}, + }, + }, + + { "_vvgnhm", + {23}, + { {"0"_b, "fmulx_asimdsame_only"}, + }, + }, + + { "_vvgpzq", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fcvtns_32h_float2int"}, + {"00001"_b, "fcvtnu_32h_float2int"}, + {"00010"_b, "scvtf_h32_float2int"}, + {"00011"_b, "ucvtf_h32_float2int"}, + {"00100"_b, "fcvtas_32h_float2int"}, + {"00101"_b, "fcvtau_32h_float2int"}, + {"00110"_b, "fmov_32h_float2int"}, + {"00111"_b, "fmov_h32_float2int"}, + {"01000"_b, "fcvtps_32h_float2int"}, + {"01001"_b, "fcvtpu_32h_float2int"}, + {"10000"_b, "fcvtms_32h_float2int"}, + {"10001"_b, "fcvtmu_32h_float2int"}, + {"11000"_b, "fcvtzs_32h_float2int"}, + {"11001"_b, "fcvtzu_32h_float2int"}, + }, + }, + + { "_vvtnrv", + {23, 22, 20, 19, 18}, + { {"00000"_b, "orr_z_zi"}, + {"01000"_b, "eor_z_zi"}, + {"10000"_b, "and_z_zi"}, + {"11000"_b, "dupm_z_i"}, + }, + }, + + { "_vvxsxt", + {4}, + { {"0"_b, "ands_p_p_pp_z"}, + {"1"_b, "bics_p_p_pp_z"}, + }, + }, + + { "_vvyjmh", + {23, 22, 20, 19, 11}, + { {"00010"_b, "ssra_asisdshf_r"}, + {"001x0"_b, "ssra_asisdshf_r"}, + {"01xx0"_b, "ssra_asisdshf_r"}, + }, + }, + + { "_vvzsmg", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "bic_asimdimm_l_sl"}, + {"00x100"_b, "usra_asimdshf_r"}, + {"00x110"_b, "ursra_asimdshf_r"}, + {"010x00"_b, "usra_asimdshf_r"}, + {"010x10"_b, "ursra_asimdshf_r"}, + {"011100"_b, "usra_asimdshf_r"}, + {"011110"_b, "ursra_asimdshf_r"}, + {"0x1000"_b, "usra_asimdshf_r"}, + {"0x1010"_b, "ursra_asimdshf_r"}, + }, + }, + + { "_vxhgzz", + {23, 22, 12, 11, 10}, + { {"00xxx"_b, "ext_z_zi_des"}, + {"01xxx"_b, "ext_z_zi_con"}, + {"10000"_b, "zip1_z_zz_q"}, + {"10001"_b, "zip2_z_zz_q"}, + {"10010"_b, "uzp1_z_zz_q"}, + {"10011"_b, "uzp2_z_zz_q"}, + {"10110"_b, "trn1_z_zz_q"}, + {"10111"_b, "trn2_z_zz_q"}, + }, + }, + + { "_vxhjgg", + {20, 18, 17, 16}, + { {"0000"_b, "_shgxyq"}, + }, + }, + + { "_vxlmxz", + {4, 3, 2, 1, 0}, + { {"11111"_b, "_hpmvzr"}, + }, + }, + + { "_vxqtkl", + {18, 17}, + { {"00"_b, "_zqmvqs"}, + }, + }, + + { "_vxrnyh", + {18, 17}, + { {"0x"_b, "st1_asisdlsep_r1_r1"}, + {"10"_b, "st1_asisdlsep_r1_r1"}, + {"11"_b, "st1_asisdlsep_i1_i1"}, + }, + }, + + { "_vxvyyg", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xx00"_b, "stlurb_32_ldapstl_unscaled"}, + {"001xx00"_b, "ldapurb_32_ldapstl_unscaled"}, + {"010xx00"_b, "ldapursb_64_ldapstl_unscaled"}, + {"011xx00"_b, "ldapursb_32_ldapstl_unscaled"}, + {"100xx00"_b, "stlurh_32_ldapstl_unscaled"}, + {"101xx00"_b, "ldapurh_32_ldapstl_unscaled"}, + {"110xx00"_b, "ldapursh_64_ldapstl_unscaled"}, + {"111xx00"_b, "ldapursh_32_ldapstl_unscaled"}, + {"x000001"_b, "cpyfpn_cpy_memcms"}, + {"x000101"_b, "cpyfpwtn_cpy_memcms"}, + {"x001001"_b, "cpyfprtn_cpy_memcms"}, + {"x001101"_b, "cpyfptn_cpy_memcms"}, + {"x010001"_b, "cpyfmn_cpy_memcms"}, + {"x010101"_b, "cpyfmwtn_cpy_memcms"}, + {"x011001"_b, "cpyfmrtn_cpy_memcms"}, + {"x011101"_b, "cpyfmtn_cpy_memcms"}, + {"x100001"_b, "cpyfen_cpy_memcms"}, + {"x100101"_b, "cpyfewtn_cpy_memcms"}, + {"x101001"_b, "cpyfertn_cpy_memcms"}, + {"x101101"_b, "cpyfetn_cpy_memcms"}, + }, + }, + + { "_vyjsst", + {30, 4}, + { {"0x"_b, "b_only_branch_imm"}, + {"10"_b, "b_only_condbranch"}, + {"11"_b, "bc_only_condbranch"}, + }, + }, + + { "_vypgrt", + {20, 19, 18, 17, 16}, + { {"00000"_b, "rev16_asimdmisc_r"}, + }, + }, + + { "_vypnss", + {30}, + { {"0"_b, "orn_32_log_shift"}, + {"1"_b, "bics_32_log_shift"}, + }, + }, + + { "_vyqxyz", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtau_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtau_asimdmisc_r"}, + {"0x10000"_b, "fmaxnmv_asimdall_only_sd"}, + {"1111000"_b, "fcmge_asimdmiscfp16_fz"}, + {"1x00000"_b, "fcmge_asimdmisc_fz"}, + {"1x00001"_b, "ursqrte_asimdmisc_r"}, + {"1x10000"_b, "fminnmv_asimdall_only_sd"}, + }, + }, + + { "_vzjvtv", + {23, 22, 12, 11, 10}, + { {"01001"_b, "bfmmla_z_zzz"}, + {"10001"_b, "fmmla_z_zzz_s"}, + {"11001"_b, "fmmla_z_zzz_d"}, + }, + }, + + { "_vzvstm", + {23, 22, 20, 19, 12, 11}, + { {"000000"_b, "movi_asimdimm_n_b"}, + {"000010"_b, "fmov_asimdimm_s_s"}, + {"000011"_b, "fmov_asimdimm_h_h"}, + {"00x100"_b, "scvtf_asimdshf_c"}, + {"00x111"_b, "fcvtzs_asimdshf_c"}, + {"010x00"_b, "scvtf_asimdshf_c"}, + {"010x11"_b, "fcvtzs_asimdshf_c"}, + {"011100"_b, "scvtf_asimdshf_c"}, + {"011111"_b, "fcvtzs_asimdshf_c"}, + {"0x1000"_b, "scvtf_asimdshf_c"}, + {"0x1011"_b, "fcvtzs_asimdshf_c"}, + }, + }, + + { "_vzyklr", + {13, 12}, + { {"00"_b, "setp_set_memcms"}, + {"01"_b, "setpt_set_memcms"}, + {"10"_b, "setpn_set_memcms"}, + {"11"_b, "setptn_set_memcms"}, + }, + }, + + { "_vzzqhx", + {12, 10}, + { {"00"_b, "_phrqqx"}, + {"01"_b, "_snnlgr"}, + {"10"_b, "_phsrlk"}, + {"11"_b, "_nrmlqv"}, + }, + }, + + { "_xghrjn", + {20, 19, 18, 17, 16}, + { {"00010"_b, "scvtf_h32_float2fix"}, + {"00011"_b, "ucvtf_h32_float2fix"}, + {"11000"_b, "fcvtzs_32h_float2fix"}, + {"11001"_b, "fcvtzu_32h_float2fix"}, + }, + }, + + { "_xgqhjv", + {13, 12}, + { {"10"_b, "smax_64_dp_2src"}, + }, + }, + + { "_xgxtlr", + {23}, + { {"0"_b, "fdiv_asimdsame_only"}, + }, + }, + + { "_xhhqnx", + {30, 23, 22, 13, 12, 11, 10}, + { {"1101001"_b, "ummla_asimdsame2_g"}, + {"xxx0001"_b, "sqrdmlah_asimdsame2_only"}, + {"xxx0011"_b, "sqrdmlsh_asimdsame2_only"}, + {"xxx0101"_b, "udot_asimdsame2_d"}, + }, + }, + + { "_xhktsk", + {22}, + { {"0"_b, "smullt_z_zzi_s"}, + {"1"_b, "smullt_z_zzi_d"}, + }, + }, + + { "_xhlhmh", + {4}, + { {"0"_b, "cmplo_p_p_zi"}, + {"1"_b, "cmpls_p_p_zi"}, + }, + }, + + { "_xhmpmy", + {4}, + { {"0"_b, "and_p_p_pp_z"}, + {"1"_b, "bic_p_p_pp_z"}, + }, + }, + + { "_xjtzgm", + {30, 23, 22, 11, 10}, + { {"00000"_b, "stur_b_ldst_unscaled"}, + {"00001"_b, "str_b_ldst_immpost"}, + {"00011"_b, "str_b_ldst_immpre"}, + {"00100"_b, "ldur_b_ldst_unscaled"}, + {"00101"_b, "ldr_b_ldst_immpost"}, + {"00111"_b, "ldr_b_ldst_immpre"}, + {"01000"_b, "stur_q_ldst_unscaled"}, + {"01001"_b, "str_q_ldst_immpost"}, + {"01011"_b, "str_q_ldst_immpre"}, + {"01100"_b, "ldur_q_ldst_unscaled"}, + {"01101"_b, "ldr_q_ldst_immpost"}, + {"01111"_b, "ldr_q_ldst_immpre"}, + {"10000"_b, "stur_h_ldst_unscaled"}, + {"10001"_b, "str_h_ldst_immpost"}, + {"10011"_b, "str_h_ldst_immpre"}, + {"10100"_b, "ldur_h_ldst_unscaled"}, + {"10101"_b, "ldr_h_ldst_immpost"}, + {"10111"_b, "ldr_h_ldst_immpre"}, + }, + }, + + { "_xksqnh", + {22, 20, 19, 18, 17, 16, 13, 12}, + { {"01111101"_b, "ld64b_64l_memop"}, + }, + }, + + { "_xkylhh", + {22, 13, 12}, + { {"000"_b, "swpa_32_memop"}, + {"100"_b, "swpal_32_memop"}, + }, + }, + + { "_xkznrh", + {18, 17}, + { {"00"_b, "st3_asisdlse_r3"}, + }, + }, + + { "_xlgxhn", + {23, 22, 4}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, + }, + }, + + { "_xlqmhl", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldar_lr64_ldstexcl"}, + }, + }, + + { "_xlyjsz", + {23, 22, 13}, + { {"100"_b, "fmlal2_asimdelem_lh"}, + {"xx1"_b, "umull_asimdelem_l"}, + }, + }, + + { "_xlyppq", + {23, 22, 20, 19, 18, 17, 16}, + { {"0010000"_b, "fmaxv_asimdall_only_h"}, + {"0x00001"_b, "frint64z_asimdmisc_r"}, + {"1010000"_b, "fminv_asimdall_only_h"}, + {"1111000"_b, "fabs_asimdmiscfp16_r"}, + {"1x00000"_b, "fabs_asimdmisc_r"}, + }, + }, + + { "_xmkysx", + {12}, + { {"0"_b, "st4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_xmxhhg", + {13, 12, 4}, + { {"000"_b, "rmif_only_rmif"}, + }, + }, + + { "_xmxpnx", + {10}, + { {"0"_b, "sri_z_zzi"}, + {"1"_b, "sli_z_zzi"}, + }, + }, + + { "_xnhkpk", + {23, 22}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, + }, + }, + + { "_xnpyvy", + {13, 10}, + { {"00"_b, "_sylkvm"}, + {"01"_b, "_nvnjyp"}, + {"10"_b, "_ltrntg"}, + {"11"_b, "_qrtjvn"}, + }, + }, + + { "_xnrrsy", + {18}, + { {"0"_b, "st1_asisdlsep_r4_r4"}, + {"1"_b, "st1_asisdlsep_i4_i4"}, + }, + }, + + { "_xnrxym", + {18}, + { {"0"_b, "ld2_asisdlsep_r2_r"}, + {"1"_b, "ld2_asisdlsep_i2_i"}, + }, + }, + + { "_xpqglq", + {4}, + { {"0"_b, "cmpeq_p_p_zi"}, + {"1"_b, "cmpne_p_p_zi"}, + }, + }, + + { "_xprqgs", + {23, 20, 19, 18, 17, 16}, + { {"000001"_b, "fcvtxn_asisdmisc_n"}, + }, + }, + + { "_xptsns", + {23, 22}, + { {"00"_b, "tbx_asimdtbl_l1_1"}, + }, + }, + + { "_xqhxql", + {12}, + { {"0"_b, "st2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_xqrgjj", + {4}, + { {"0"_b, "ccmp_64_condcmp_imm"}, + }, + }, + + { "_xqvzvl", + {18, 17}, + { {"0x"_b, "st1_asisdlsep_r3_r3"}, + {"10"_b, "st1_asisdlsep_r3_r3"}, + {"11"_b, "st1_asisdlsep_i3_i3"}, + }, + }, + + { "_xrkzpn", + {12}, + { {"0"_b, "_zjqssg"}, + }, + }, + + { "_xrnqyn", + {30}, + { {"0"_b, "stlr_32s_ldapstl_writeback"}, + {"1"_b, "stlr_64s_ldapstl_writeback"}, + }, + }, + + { "_xrskrk", + {22, 12}, + { {"10"_b, "_kyhhqt"}, + }, + }, + + { "_xrzqtn", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_gyllxt"}, + }, + }, + + { "_xsgnlv", + {30, 23, 13, 12, 11, 10}, + { {"100001"_b, "ushr_asisdshf_r"}, + {"100101"_b, "usra_asisdshf_r"}, + {"101001"_b, "urshr_asisdshf_r"}, + {"101101"_b, "ursra_asisdshf_r"}, + }, + }, + + { "_xspjzn", + {13, 12, 11, 10}, + { {"1111"_b, "casl_c64_ldstexcl"}, + }, + }, + + { "_xsvpzx", + {18, 17, 12}, + { {"000"_b, "ld4_asisdlso_d4_4d"}, + }, + }, + + { "_xszmjn", + {30, 13, 12}, + { {"000"_b, "ldiapp_32le_ldiappstilp"}, + {"001"_b, "ldiapp_32l_ldiappstilp"}, + {"100"_b, "ldiapp_64ls_ldiappstilp"}, + {"101"_b, "ldiapp_64l_ldiappstilp"}, + }, + }, + + { "_xszqrg", + {30, 23, 22}, + { {"000"_b, "_glpxty"}, + {"001"_b, "_rkpylh"}, + {"011"_b, "_xghrjn"}, + {"100"_b, "_nklqly"}, + }, + }, + + { "_xtgmvr", + {23, 11, 10, 4, 3, 2, 0}, + { {"0000000"_b, "_mzkxzm"}, + {"0101111"_b, "_qgvrqy"}, + {"0111111"_b, "_lljxgp"}, + {"1000000"_b, "_tjlthk"}, + }, + }, + + { "_xtgtyz", + {19, 18, 17, 16}, + { {"0000"_b, "brkb_p_p_p"}, + }, + }, + + { "_xtxyxj", + {4}, + { {"0"_b, "orr_p_p_pp_z"}, + {"1"_b, "orn_p_p_pp_z"}, + }, + }, + + { "_xtzykp", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldlarh_lr32_ldstexcl"}, + }, + }, + + { "_xvmxrg", + {13}, + { {"0"_b, "mla_asimdelem_r"}, + {"1"_b, "umlal_asimdelem_l"}, + }, + }, + + { "_xvnyxq", + {30, 23, 13, 4}, + { {"0000"_b, "prfb_i_p_bz_s_x32_scaled"}, + {"0010"_b, "prfh_i_p_bz_s_x32_scaled"}, + {"010x"_b, "ld1sh_z_p_bz_s_x32_scaled"}, + {"011x"_b, "ldff1sh_z_p_bz_s_x32_scaled"}, + {"1000"_b, "prfb_i_p_bz_d_x32_scaled"}, + {"1010"_b, "prfh_i_p_bz_d_x32_scaled"}, + {"110x"_b, "ld1sh_z_p_bz_d_x32_scaled"}, + {"111x"_b, "ldff1sh_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_xvppmm", + {30, 23, 22, 13, 12, 11, 10}, + { {"0xx0xxx"_b, "mla_z_p_zzz"}, + {"0xx1xxx"_b, "mls_z_p_zzz"}, + {"1101110"_b, "usdot_z_zzz_s"}, + {"1xx0000"_b, "smlalb_z_zzz"}, + {"1xx0001"_b, "smlalt_z_zzz"}, + {"1xx0010"_b, "umlalb_z_zzz"}, + {"1xx0011"_b, "umlalt_z_zzz"}, + {"1xx0100"_b, "smlslb_z_zzz"}, + {"1xx0101"_b, "smlslt_z_zzz"}, + {"1xx0110"_b, "umlslb_z_zzz"}, + {"1xx0111"_b, "umlslt_z_zzz"}, + {"1xx1000"_b, "sqdmlalb_z_zzz"}, + {"1xx1001"_b, "sqdmlalt_z_zzz"}, + {"1xx1010"_b, "sqdmlslb_z_zzz"}, + {"1xx1011"_b, "sqdmlslt_z_zzz"}, + {"1xx1100"_b, "sqrdmlah_z_zzz"}, + {"1xx1101"_b, "sqrdmlsh_z_zzz"}, + }, + }, + + { "_xvrvhv", + {4}, + { {"0"_b, "ccmp_32_condcmp_reg"}, + }, + }, + + { "_xxjrsy", + {23, 22, 9}, + { {"000"_b, "rdffr_p_p_f"}, + {"010"_b, "rdffrs_p_p_f"}, + }, + }, + + { "_xxphlt", + {23}, + { {"0"_b, "_qgshrr"}, + }, + }, + + { "_xxqzvy", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fcvtns_32d_float2int"}, + {"00001"_b, "fcvtnu_32d_float2int"}, + {"00010"_b, "scvtf_d32_float2int"}, + {"00011"_b, "ucvtf_d32_float2int"}, + {"00100"_b, "fcvtas_32d_float2int"}, + {"00101"_b, "fcvtau_32d_float2int"}, + {"01000"_b, "fcvtps_32d_float2int"}, + {"01001"_b, "fcvtpu_32d_float2int"}, + {"10000"_b, "fcvtms_32d_float2int"}, + {"10001"_b, "fcvtmu_32d_float2int"}, + {"11000"_b, "fcvtzs_32d_float2int"}, + {"11001"_b, "fcvtzu_32d_float2int"}, + {"11110"_b, "fjcvtzs_32d_float2int"}, + }, + }, + + { "_xygvjp", + {23, 22}, + { {"00"_b, "and_asimdsame_only"}, + {"01"_b, "bic_asimdsame_only"}, + {"10"_b, "orr_asimdsame_only"}, + {"11"_b, "orn_asimdsame_only"}, + }, + }, + + { "_xyhmgh", + {23, 22, 20, 9}, + { {"0000"_b, "_xhmpmy"}, + {"0001"_b, "_qnprqt"}, + {"0010"_b, "_nnzhgm"}, + {"0100"_b, "_vvxsxt"}, + {"0101"_b, "_yzmjhn"}, + {"0110"_b, "_mkgsly"}, + {"1000"_b, "_xtxyxj"}, + {"1001"_b, "_hmtmlq"}, + {"1010"_b, "_xtgtyz"}, + {"1100"_b, "_yynmjl"}, + {"1101"_b, "_sjnspg"}, + {"1110"_b, "_jzjvtv"}, + }, + }, + + { "_xymnxy", + {30}, + { {"0"_b, "tbz_only_testbranch"}, + }, + }, + + { "_xynxhx", + {30, 23, 22, 11, 10}, + { {"00010"_b, "str_b_ldst_regoff"}, + {"00110"_b, "ldr_b_ldst_regoff"}, + {"01010"_b, "str_q_ldst_regoff"}, + {"01110"_b, "ldr_q_ldst_regoff"}, + {"10010"_b, "str_h_ldst_regoff"}, + {"10110"_b, "ldr_h_ldst_regoff"}, + }, + }, + + { "_xzjvkv", + {23, 22}, + { {"00"_b, "tbl_asimdtbl_l1_1"}, + }, + }, + + { "_xzlxjh", + {30, 23, 22}, + { {"001"_b, "sbfm_64m_bitfield"}, + {"011"_b, "extr_64_extract"}, + {"101"_b, "ubfm_64m_bitfield"}, + }, + }, + + { "_xzmrlg", + {30, 23, 22}, + { {"000"_b, "stlxr_sr32_ldstexcl"}, + {"001"_b, "_zzkgsk"}, + {"010"_b, "_mnzzhk"}, + {"011"_b, "_qlxlxk"}, + {"100"_b, "stlxr_sr64_ldstexcl"}, + {"101"_b, "_tknqxs"}, + {"110"_b, "_mhpgjx"}, + {"111"_b, "_xlqmhl"}, + }, + }, + + { "_xznsqh", + {22, 20, 11}, + { {"000"_b, "cntw_r_s"}, + {"010"_b, "incw_r_rs"}, + {"100"_b, "cntd_r_s"}, + {"110"_b, "incd_r_rs"}, + }, + }, + + { "_xzntxr", + {23, 22, 20, 19, 18, 17, 16}, + { {"0000000"_b, "fcvtns_64s_float2int"}, + {"0000001"_b, "fcvtnu_64s_float2int"}, + {"0000010"_b, "scvtf_s64_float2int"}, + {"0000011"_b, "ucvtf_s64_float2int"}, + {"0000100"_b, "fcvtas_64s_float2int"}, + {"0000101"_b, "fcvtau_64s_float2int"}, + {"0001000"_b, "fcvtps_64s_float2int"}, + {"0001001"_b, "fcvtpu_64s_float2int"}, + {"0010000"_b, "fcvtms_64s_float2int"}, + {"0010001"_b, "fcvtmu_64s_float2int"}, + {"0011000"_b, "fcvtzs_64s_float2int"}, + {"0011001"_b, "fcvtzu_64s_float2int"}, + {"0100000"_b, "fcvtns_64d_float2int"}, + {"0100001"_b, "fcvtnu_64d_float2int"}, + {"0100010"_b, "scvtf_d64_float2int"}, + {"0100011"_b, "ucvtf_d64_float2int"}, + {"0100100"_b, "fcvtas_64d_float2int"}, + {"0100101"_b, "fcvtau_64d_float2int"}, + {"0100110"_b, "fmov_64d_float2int"}, + {"0100111"_b, "fmov_d64_float2int"}, + {"0101000"_b, "fcvtps_64d_float2int"}, + {"0101001"_b, "fcvtpu_64d_float2int"}, + {"0110000"_b, "fcvtms_64d_float2int"}, + {"0110001"_b, "fcvtmu_64d_float2int"}, + {"0111000"_b, "fcvtzs_64d_float2int"}, + {"0111001"_b, "fcvtzu_64d_float2int"}, + {"1001110"_b, "fmov_64vx_float2int"}, + {"1001111"_b, "fmov_v64i_float2int"}, + {"1100000"_b, "fcvtns_64h_float2int"}, + {"1100001"_b, "fcvtnu_64h_float2int"}, + {"1100010"_b, "scvtf_h64_float2int"}, + {"1100011"_b, "ucvtf_h64_float2int"}, + {"1100100"_b, "fcvtas_64h_float2int"}, + {"1100101"_b, "fcvtau_64h_float2int"}, + {"1100110"_b, "fmov_64h_float2int"}, + {"1100111"_b, "fmov_h64_float2int"}, + {"1101000"_b, "fcvtps_64h_float2int"}, + {"1101001"_b, "fcvtpu_64h_float2int"}, + {"1110000"_b, "fcvtms_64h_float2int"}, + {"1110001"_b, "fcvtmu_64h_float2int"}, + {"1111000"_b, "fcvtzs_64h_float2int"}, + {"1111001"_b, "fcvtzu_64h_float2int"}, + }, + }, + + { "_xzqmkv", + {13, 12}, + { {"00"_b, "add_asisdsame_only"}, + {"11"_b, "sqdmulh_asisdsame_only"}, + }, + }, + + { "_ygghnn", + {20, 19, 18, 17, 16}, + { {"00000"_b, "suqadd_asimdmisc_r"}, + {"10000"_b, "saddlv_asimdall_only"}, + }, + }, + + { "_ygtpyl", + {22, 13, 12}, + { {"000"_b, "swp_32_memop"}, + {"100"_b, "swpl_32_memop"}, + }, + }, + + { "_yhhsns", + {20, 19, 18, 17}, + { {"0000"_b, "_myrkmk"}, + }, + }, + + { "_yhlntp", + {20, 19, 18, 17, 16}, + { {"00000"_b, "fexpa_z_z"}, + }, + }, + + { "_yhmlxk", + {13, 12, 11, 10}, + { {"0000"_b, "decp_z_p_z"}, + {"0010"_b, "decp_r_p_r"}, + }, + }, + + { "_yhnqyy", + {13, 12}, + { {"01"_b, "sqdmlal_asisddiff_only"}, + {"11"_b, "sqdmlsl_asisddiff_only"}, + }, + }, + + { "_yjktml", + {30}, + { {"0"_b, "ldr_32_loadlit"}, + {"1"_b, "ldr_64_loadlit"}, + }, + }, + + { "_yjmngt", + {30}, + { {"0"_b, "sel_z_p_zz"}, + {"1"_b, "_vpmxrj"}, + }, + }, + + { "_yjnkrn", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_grqsgp"}, + }, + }, + + { "_yjnmkg", + {30, 23, 11, 10}, + { {"0000"_b, "_szysqh"}, + {"0010"_b, "_ksrkkn"}, + {"0100"_b, "_gljqng"}, + {"0110"_b, "_qtghgs"}, + {"1000"_b, "_gjprgr"}, + {"1001"_b, "ldraa_64_ldst_pac"}, + {"1010"_b, "_gnpgsg"}, + {"1011"_b, "ldraa_64w_ldst_pac"}, + {"1100"_b, "_lnmhqq"}, + {"1101"_b, "ldrab_64_ldst_pac"}, + {"1110"_b, "_gsvlph"}, + {"1111"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_yjzknm", + {13, 12, 11, 10}, + { {"0000"_b, "uqdecp_z_p_z"}, + {"0010"_b, "uqdecp_r_p_r_uw"}, + {"0011"_b, "uqdecp_r_p_r_x"}, + }, + }, + + { "_ykhhqq", + {18}, + { {"0"_b, "ld2_asisdlsop_hx2_r2h"}, + {"1"_b, "ld2_asisdlsop_h2_i2h"}, + }, + }, + + { "_ykjhgg", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "ldaddb_32_memop"}, + {"0000100"_b, "ldclrb_32_memop"}, + {"0001000"_b, "ldeorb_32_memop"}, + {"0001100"_b, "ldsetb_32_memop"}, + {"000xx10"_b, "strb_32b_ldst_regoff"}, + {"0010000"_b, "ldaddlb_32_memop"}, + {"0010100"_b, "ldclrlb_32_memop"}, + {"0011000"_b, "ldeorlb_32_memop"}, + {"0011100"_b, "ldsetlb_32_memop"}, + {"001xx10"_b, "ldrb_32b_ldst_regoff"}, + {"0100000"_b, "ldaddab_32_memop"}, + {"0100100"_b, "ldclrab_32_memop"}, + {"0101000"_b, "ldeorab_32_memop"}, + {"0101100"_b, "ldsetab_32_memop"}, + {"010xx10"_b, "ldrsb_64b_ldst_regoff"}, + {"0110000"_b, "ldaddalb_32_memop"}, + {"0110100"_b, "ldclralb_32_memop"}, + {"0111000"_b, "ldeoralb_32_memop"}, + {"0111100"_b, "ldsetalb_32_memop"}, + {"011xx10"_b, "ldrsb_32b_ldst_regoff"}, + {"1000000"_b, "ldaddh_32_memop"}, + {"1000100"_b, "ldclrh_32_memop"}, + {"1001000"_b, "ldeorh_32_memop"}, + {"1001100"_b, "ldseth_32_memop"}, + {"100xx10"_b, "strh_32_ldst_regoff"}, + {"1010000"_b, "ldaddlh_32_memop"}, + {"1010100"_b, "ldclrlh_32_memop"}, + {"1011000"_b, "ldeorlh_32_memop"}, + {"1011100"_b, "ldsetlh_32_memop"}, + {"101xx10"_b, "ldrh_32_ldst_regoff"}, + {"1100000"_b, "ldaddah_32_memop"}, + {"1100100"_b, "ldclrah_32_memop"}, + {"1101000"_b, "ldeorah_32_memop"}, + {"1101100"_b, "ldsetah_32_memop"}, + {"110xx10"_b, "ldrsh_64_ldst_regoff"}, + {"1110000"_b, "ldaddalh_32_memop"}, + {"1110100"_b, "ldclralh_32_memop"}, + {"1111000"_b, "ldeoralh_32_memop"}, + {"1111100"_b, "ldsetalh_32_memop"}, + {"111xx10"_b, "ldrsh_32_ldst_regoff"}, + }, + }, + + { "_ykpgyh", + {13, 12, 5}, + { {"010"_b, "_gknljg"}, + {"011"_b, "_hjqryy"}, + {"100"_b, "_lmmkzh"}, + {"101"_b, "_vxlmxz"}, + {"110"_b, "_phktvp"}, + {"111"_b, "_qqvgql"}, + }, + }, + + { "_ykpqth", + {12}, + { {"0"_b, "st4_asisdlsop_dx4_r4d"}, + }, + }, + + { "_ykptgl", + {30, 23}, + { {"00"_b, "adds_32s_addsub_imm"}, + {"10"_b, "subs_32s_addsub_imm"}, + }, + }, + + { "_ylhgrh", + {13, 12, 11, 10}, + { {"0011"_b, "uqadd_asisdsame_only"}, + {"1010"_b, "_msvjxq"}, + {"1011"_b, "uqsub_asisdsame_only"}, + {"1101"_b, "cmhi_asisdsame_only"}, + {"1110"_b, "_yzlnrs"}, + {"1111"_b, "cmhs_asisdsame_only"}, + }, + }, + + { "_ylnsvy", + {20, 19, 18, 17, 16}, + { {"00000"_b, "dup_z_r"}, + {"00100"_b, "insr_z_r"}, + {"10000"_b, "sunpklo_z_z"}, + {"10001"_b, "sunpkhi_z_z"}, + {"10010"_b, "uunpklo_z_z"}, + {"10011"_b, "uunpkhi_z_z"}, + {"10100"_b, "insr_z_v"}, + {"11000"_b, "rev_z_z"}, + }, + }, + + { "_ymghnh", + {20, 19, 18, 17, 16}, + { {"11111"_b, "st64b_64l_memop"}, + }, + }, + + { "_ymhgxg", + {30, 13}, + { {"00"_b, "_yrmmmg"}, + {"01"_b, "_sghgtk"}, + {"10"_b, "_nxjkqs"}, + {"11"_b, "_yvyhlh"}, + }, + }, + + { "_ymhkrx", + {30, 23, 22, 13, 4}, + { {"0000x"_b, "ld1b_z_p_ai_s"}, + {"0001x"_b, "ldff1b_z_p_ai_s"}, + {"0010x"_b, "ld1rb_z_p_bi_u32"}, + {"0011x"_b, "ld1rb_z_p_bi_u64"}, + {"0100x"_b, "ld1h_z_p_ai_s"}, + {"0101x"_b, "ldff1h_z_p_ai_s"}, + {"0110x"_b, "ld1rh_z_p_bi_u32"}, + {"0111x"_b, "ld1rh_z_p_bi_u64"}, + {"1000x"_b, "ld1b_z_p_ai_d"}, + {"1001x"_b, "ldff1b_z_p_ai_d"}, + {"10100"_b, "prfw_i_p_bz_d_64_scaled"}, + {"10110"_b, "prfd_i_p_bz_d_64_scaled"}, + {"1100x"_b, "ld1h_z_p_ai_d"}, + {"1101x"_b, "ldff1h_z_p_ai_d"}, + {"1110x"_b, "ld1h_z_p_bz_d_64_scaled"}, + {"1111x"_b, "ldff1h_z_p_bz_d_64_scaled"}, + }, + }, + + { "_ymkthj", + {20, 9, 4}, + { {"000"_b, "uzp2_p_pp"}, + }, + }, + + { "_ymmhtq", + {23, 22, 20, 19, 11}, + { {"00010"_b, "srsra_asisdshf_r"}, + {"001x0"_b, "srsra_asisdshf_r"}, + {"01xx0"_b, "srsra_asisdshf_r"}, + }, + }, + + { "_ymszkr", + {30}, + { {"0"_b, "ldr_q_loadlit"}, + }, + }, + + { "_ymtzjg", + {12, 10}, + { {"00"_b, "_gmsmls"}, + {"01"_b, "_rnqmyp"}, + {"10"_b, "_srttng"}, + {"11"_b, "_tymryz"}, + }, + }, + + { "_ymvlzl", + {18}, + { {"0"_b, "st4_asisdlse_r4"}, + }, + }, + + { "_ymvzyh", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldaxp_lp32_ldstexcl"}, + }, + }, + + { "_ymxjjr", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "orr_asimdimm_l_hl"}, + {"00x100"_b, "sqshrn_asimdshf_n"}, + {"00x101"_b, "sqrshrn_asimdshf_n"}, + {"010x00"_b, "sqshrn_asimdshf_n"}, + {"010x01"_b, "sqrshrn_asimdshf_n"}, + {"011100"_b, "sqshrn_asimdshf_n"}, + {"011101"_b, "sqrshrn_asimdshf_n"}, + {"0x1000"_b, "sqshrn_asimdshf_n"}, + {"0x1001"_b, "sqrshrn_asimdshf_n"}, + }, + }, + + { "_ynsytg", + {23, 22, 20, 19, 13, 11, 10}, + { {"0001001"_b, "shl_asisdshf_r"}, + {"0001101"_b, "sqshl_asisdshf_r"}, + {"001x001"_b, "shl_asisdshf_r"}, + {"001x101"_b, "sqshl_asisdshf_r"}, + {"00xx0x0"_b, "fmls_asisdelem_rh_h"}, + {"01xx001"_b, "shl_asisdshf_r"}, + {"01xx101"_b, "sqshl_asisdshf_r"}, + {"1xxx0x0"_b, "fmls_asisdelem_r_sd"}, + {"xxxx1x0"_b, "sqdmlsl_asisdelem_l"}, + }, + }, + + { "_ynyqky", + {12}, + { {"0"_b, "st2_asisdlsop_dx2_r2d"}, + }, + }, + + { "_ynznxv", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldaxrb_lr32_ldstexcl"}, + }, + }, + + { "_yppmkl", + {23, 22, 20, 19, 13, 11}, + { {"0000x0"_b, "mvni_asimdimm_l_hl"}, + {"00x100"_b, "sqshrun_asimdshf_n"}, + {"00x101"_b, "sqrshrun_asimdshf_n"}, + {"00x110"_b, "ushll_asimdshf_l"}, + {"010x00"_b, "sqshrun_asimdshf_n"}, + {"010x01"_b, "sqrshrun_asimdshf_n"}, + {"010x10"_b, "ushll_asimdshf_l"}, + {"011100"_b, "sqshrun_asimdshf_n"}, + {"011101"_b, "sqrshrun_asimdshf_n"}, + {"011110"_b, "ushll_asimdshf_l"}, + {"0x1000"_b, "sqshrun_asimdshf_n"}, + {"0x1001"_b, "sqrshrun_asimdshf_n"}, + {"0x1010"_b, "ushll_asimdshf_l"}, + }, + }, + + { "_yppszx", + {23, 22, 10}, + { {"100"_b, "umlslb_z_zzzi_s"}, + {"101"_b, "umlslt_z_zzzi_s"}, + {"110"_b, "umlslb_z_zzzi_d"}, + {"111"_b, "umlslt_z_zzzi_d"}, + }, + }, + + { "_yppyky", + {30, 13}, + { {"00"_b, "_gyrjrm"}, + {"01"_b, "_hhkqtn"}, + {"10"_b, "_jgmlpk"}, + {"11"_b, "_tzzssm"}, + }, + }, + + { "_ypsgqz", + {18, 17}, + { {"0x"_b, "ld4_asisdlsop_sx4_r4s"}, + {"10"_b, "ld4_asisdlsop_sx4_r4s"}, + {"11"_b, "ld4_asisdlsop_s4_i4s"}, + }, + }, + + { "_yptgjg", + {4}, + { {"0"_b, "ccmn_32_condcmp_reg"}, + }, + }, + + { "_yptvyx", + {30, 23, 22}, + { {"000"_b, "strb_32_ldst_pos"}, + {"001"_b, "ldrb_32_ldst_pos"}, + {"010"_b, "ldrsb_64_ldst_pos"}, + {"011"_b, "ldrsb_32_ldst_pos"}, + {"100"_b, "strh_32_ldst_pos"}, + {"101"_b, "ldrh_32_ldst_pos"}, + {"110"_b, "ldrsh_64_ldst_pos"}, + {"111"_b, "ldrsh_32_ldst_pos"}, + }, + }, + + { "_ypzllm", + {23, 22, 4}, + { {"000"_b, "fccmp_s_floatccmp"}, + {"001"_b, "fccmpe_s_floatccmp"}, + {"010"_b, "fccmp_d_floatccmp"}, + {"011"_b, "fccmpe_d_floatccmp"}, + {"110"_b, "fccmp_h_floatccmp"}, + {"111"_b, "fccmpe_h_floatccmp"}, + }, + }, + + { "_yqvqtx", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1rob_z_p_bi_u8"}, + {"000x0"_b, "ld1rob_z_p_br_contiguous"}, + {"01001"_b, "ld1roh_z_p_bi_u16"}, + {"010x0"_b, "ld1roh_z_p_br_contiguous"}, + }, + }, + + { "_yqxnzl", + {11, 10}, + { {"00"_b, "sqdmulh_z_zz"}, + {"01"_b, "sqrdmulh_z_zz"}, + }, + }, + + { "_yqzxvr", + {18, 17, 12}, + { {"000"_b, "ld3_asisdlso_d3_3d"}, + }, + }, + + { "_yrggjm", + {13, 12}, + { {"00"_b, "sshl_asisdsame_only"}, + {"01"_b, "srshl_asisdsame_only"}, + }, + }, + + { "_yrgzqr", + {23, 22, 20, 19, 17, 16, 13}, + { {"0000000"_b, "_ymvlzl"}, + {"0000001"_b, "_nzvlzt"}, + {"0100000"_b, "_zyhgnz"}, + {"0100001"_b, "_mntnlr"}, + {"100xxx0"_b, "st4_asisdlsep_r4_r"}, + {"100xxx1"_b, "st1_asisdlsep_r4_r4"}, + {"1010xx0"_b, "st4_asisdlsep_r4_r"}, + {"1010xx1"_b, "st1_asisdlsep_r4_r4"}, + {"10110x0"_b, "st4_asisdlsep_r4_r"}, + {"10110x1"_b, "st1_asisdlsep_r4_r4"}, + {"1011100"_b, "st4_asisdlsep_r4_r"}, + {"1011101"_b, "st1_asisdlsep_r4_r4"}, + {"1011110"_b, "_tshjsk"}, + {"1011111"_b, "_xnrrsy"}, + {"110xxx0"_b, "ld4_asisdlsep_r4_r"}, + {"110xxx1"_b, "ld1_asisdlsep_r4_r4"}, + {"1110xx0"_b, "ld4_asisdlsep_r4_r"}, + {"1110xx1"_b, "ld1_asisdlsep_r4_r4"}, + {"11110x0"_b, "ld4_asisdlsep_r4_r"}, + {"11110x1"_b, "ld1_asisdlsep_r4_r4"}, + {"1111100"_b, "ld4_asisdlsep_r4_r"}, + {"1111101"_b, "ld1_asisdlsep_r4_r4"}, + {"1111110"_b, "_hjvkkq"}, + {"1111111"_b, "_mthlnv"}, + }, + }, + + { "_yrjqql", + {30}, + { {"0"_b, "cbz_32_compbranch"}, + }, + }, + + { "_yrmmmg", + {4}, + { {"0"_b, "cmphs_p_p_zi"}, + {"1"_b, "cmphi_p_p_zi"}, + }, + }, + + { "_yrypnt", + {30, 23, 11, 10}, + { {"1001"_b, "_khrsgv"}, + }, + }, + + { "_yryygq", + {12}, + { {"0"_b, "ld3_asisdlsop_dx3_r3d"}, + }, + }, + + { "_yskyrg", + {20, 19, 18, 17, 16}, + { {"00000"_b, "sqneg_asisdmisc_r"}, + }, + }, + + { "_ysspjx", + {13, 12}, + { {"00"_b, "sdiv_64_dp_2src"}, + {"10"_b, "rorv_64_dp_2src"}, + }, + }, + + { "_yszjsm", + {12, 11, 10}, + { {"000"_b, "sdot_z_zzz"}, + {"001"_b, "udot_z_zzz"}, + {"010"_b, "sqdmlalbt_z_zzz"}, + {"011"_b, "sqdmlslbt_z_zzz"}, + {"1xx"_b, "cdot_z_zzz"}, + }, + }, + + { "_yszlqj", + {23, 22}, + { {"00"_b, "tbl_asimdtbl_l2_2"}, + }, + }, + + { "_ytkjxx", + {30, 23, 22, 13, 4}, + { {"00x0x"_b, "ld1w_z_p_bz_s_x32_scaled"}, + {"00x1x"_b, "ldff1w_z_p_bz_s_x32_scaled"}, + {"0100x"_b, "ldr_z_bi"}, + {"01100"_b, "prfw_i_p_bi_s"}, + {"01110"_b, "prfd_i_p_bi_s"}, + {"10x0x"_b, "ld1w_z_p_bz_d_x32_scaled"}, + {"10x1x"_b, "ldff1w_z_p_bz_d_x32_scaled"}, + {"11x0x"_b, "ld1d_z_p_bz_d_x32_scaled"}, + {"11x1x"_b, "ldff1d_z_p_bz_d_x32_scaled"}, + }, + }, + + { "_ytrmvz", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtmu_asisdmiscfp16_r"}, + {"0x00001"_b, "fcvtmu_asisdmisc_r"}, + {"1111001"_b, "fcvtzu_asisdmiscfp16_r"}, + {"1x00001"_b, "fcvtzu_asisdmisc_r"}, + {"xx00000"_b, "neg_asisdmisc_r"}, + }, + }, + + { "_ytvtqn", + {30, 23, 22, 20, 13}, + { {"00001"_b, "ld1sh_z_p_bi_s64"}, + {"00011"_b, "ldnf1sh_z_p_bi_s64"}, + {"00101"_b, "ld1w_z_p_bi_u32"}, + {"00111"_b, "ldnf1w_z_p_bi_u32"}, + {"01001"_b, "ld1sb_z_p_bi_s64"}, + {"01011"_b, "ldnf1sb_z_p_bi_s64"}, + {"01101"_b, "ld1sb_z_p_bi_s16"}, + {"01111"_b, "ldnf1sb_z_p_bi_s16"}, + {"100x0"_b, "st1w_z_p_bz_d_x32_unscaled"}, + {"100x1"_b, "st1w_z_p_bz_d_64_unscaled"}, + {"101x0"_b, "st1w_z_p_bz_s_x32_unscaled"}, + {"101x1"_b, "st1w_z_p_ai_d"}, + {"110x0"_b, "st1d_z_p_bz_d_x32_unscaled"}, + {"110x1"_b, "st1d_z_p_bz_d_64_unscaled"}, + {"111x1"_b, "st1d_z_p_ai_d"}, + }, + }, + + { "_yvptvx", + {23, 12, 11, 10}, + { {"0000"_b, "sqshrnb_z_zi"}, + {"0001"_b, "sqshrnt_z_zi"}, + {"0010"_b, "sqrshrnb_z_zi"}, + {"0011"_b, "sqrshrnt_z_zi"}, + {"0100"_b, "uqshrnb_z_zi"}, + {"0101"_b, "uqshrnt_z_zi"}, + {"0110"_b, "uqrshrnb_z_zi"}, + {"0111"_b, "uqrshrnt_z_zi"}, + }, + }, + + { "_yvqnyq", + {23}, + { {"1"_b, "_vhlqpr"}, + }, + }, + + { "_yvxkhv", + {30}, + { {"1"_b, "_ngvqhs"}, + }, + }, + + { "_yvyhlh", + {23, 22, 12, 11, 10}, + { {"0x000"_b, "fmul_z_zzi_h"}, + {"10000"_b, "fmul_z_zzi_s"}, + {"11000"_b, "fmul_z_zzi_d"}, + }, + }, + + { "_yxgmrs", + {23}, + { {"0"_b, "fmaxnmp_asimdsame_only"}, + {"1"_b, "fminnmp_asimdsame_only"}, + }, + }, + + { "_yxnslx", + {23, 22}, + { {"00"_b, "adr_z_az_d_s32_scaled"}, + {"01"_b, "adr_z_az_d_u32_scaled"}, + {"1x"_b, "adr_z_az_sd_same_scaled"}, + }, + }, + + { "_yxvttm", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + }, + }, + + { "_yykhjv", + {23, 22, 13, 12, 11, 10}, + { {"000110"_b, "smmla_z_zzz"}, + {"0x1000"_b, "sshllb_z_zi"}, + {"0x1001"_b, "sshllt_z_zi"}, + {"0x1010"_b, "ushllb_z_zi"}, + {"0x1011"_b, "ushllt_z_zi"}, + {"100110"_b, "usmmla_z_zzz"}, + {"110110"_b, "ummla_z_zzz"}, + {"xx0000"_b, "saddlbt_z_zz"}, + {"xx0010"_b, "ssublbt_z_zz"}, + {"xx0011"_b, "ssubltb_z_zz"}, + {"xx0100"_b, "eorbt_z_zz"}, + {"xx0101"_b, "eortb_z_zz"}, + {"xx1100"_b, "bext_z_zz"}, + {"xx1101"_b, "bdep_z_zz"}, + {"xx1110"_b, "bgrp_z_zz"}, + }, + }, + + { "_yynmjl", + {4}, + { {"0"_b, "orrs_p_p_pp_z"}, + {"1"_b, "orns_p_p_pp_z"}, + }, + }, + + { "_yyrkmn", + {17, 16, 9, 8, 7, 6, 5}, + { {"0000000"_b, "aesmc_z_z"}, + {"10xxxxx"_b, "aese_z_zz"}, + {"11xxxxx"_b, "sm4e_z_zz"}, + }, + }, + + { "_yysxts", + {23, 22, 13, 12, 11, 10}, + { {"0001x0"_b, "fmla_asimdelem_rh_h"}, + {"0x0001"_b, "sshr_asimdshf_r"}, + {"0x0101"_b, "ssra_asimdshf_r"}, + {"0x1001"_b, "srshr_asimdshf_r"}, + {"0x1101"_b, "srsra_asimdshf_r"}, + {"1000x0"_b, "fmlal_asimdelem_lh"}, + {"1x01x0"_b, "fmla_asimdelem_r_sd"}, + {"xx10x0"_b, "smlal_asimdelem_l"}, + {"xx11x0"_b, "sqdmlal_asimdelem_l"}, + }, + }, + + { "_yytvxh", + {30, 23, 22, 13, 4}, + { {"00000"_b, "prfw_i_p_br_s"}, + {"00010"_b, "prfw_i_p_ai_s"}, + {"0010x"_b, "ld1rw_z_p_bi_u32"}, + {"0011x"_b, "ld1rw_z_p_bi_u64"}, + {"01000"_b, "prfd_i_p_br_s"}, + {"01010"_b, "prfd_i_p_ai_s"}, + {"0110x"_b, "ld1rsb_z_p_bi_s16"}, + {"0111x"_b, "ld1rd_z_p_bi_u64"}, + {"1000x"_b, "ldnt1w_z_p_ar_d_64_unscaled"}, + {"10010"_b, "prfw_i_p_ai_d"}, + {"1010x"_b, "ld1w_z_p_bz_d_64_unscaled"}, + {"1011x"_b, "ldff1w_z_p_bz_d_64_unscaled"}, + {"1100x"_b, "ldnt1d_z_p_ar_d_64_unscaled"}, + {"11010"_b, "prfd_i_p_ai_d"}, + {"1110x"_b, "ld1d_z_p_bz_d_64_unscaled"}, + {"1111x"_b, "ldff1d_z_p_bz_d_64_unscaled"}, + }, + }, + + { "_yyvjqv", + {23}, + { {"0"_b, "fmax_asimdsame_only"}, + {"1"_b, "fmin_asimdsame_only"}, + }, + }, + + { "_yyvnrp", + {23, 22}, + { {"00"_b, "eor_asimdsame_only"}, + {"01"_b, "bsl_asimdsame_only"}, + {"10"_b, "bit_asimdsame_only"}, + {"11"_b, "bif_asimdsame_only"}, + }, + }, + + { "_yyyshx", + {30, 13, 4}, + { {"000"_b, "cmphs_p_p_zz"}, + {"001"_b, "cmphi_p_p_zz"}, + {"010"_b, "cmpeq_p_p_zw"}, + {"011"_b, "cmpne_p_p_zw"}, + {"1xx"_b, "fcmla_z_p_zzz"}, + }, + }, + + { "_yyyxhk", + {18}, + { {"0"_b, "ld1_asisdlsep_r2_r2"}, + {"1"_b, "ld1_asisdlsep_i2_i2"}, + }, + }, + + { "_yzgthp", + {18, 17}, + { {"0x"_b, "ld1_asisdlsop_sx1_r1s"}, + {"10"_b, "ld1_asisdlsop_sx1_r1s"}, + {"11"_b, "ld1_asisdlsop_s1_i1s"}, + }, + }, + + { "_yzlnrs", + {20, 19, 18, 17, 16}, + { {"00000"_b, "usqadd_asisdmisc_r"}, + }, + }, + + { "_yzmjhn", + {4}, + { {"0"_b, "eors_p_p_pp_z"}, + }, + }, + + { "_yzpszn", + {30}, + { {"0"_b, "ldr_s_loadlit"}, + {"1"_b, "ldr_d_loadlit"}, + }, + }, + + { "_yzqhtj", + {30, 23, 22, 11, 10}, + { {"00000"_b, "_rxsqhv"}, + {"01000"_b, "csel_64_condsel"}, + {"01001"_b, "csinc_64_condsel"}, + {"01100"_b, "_zqxkxg"}, + {"01101"_b, "_rvjkyp"}, + {"01110"_b, "_jxgpgg"}, + {"01111"_b, "_ysspjx"}, + {"10000"_b, "_pjvkjz"}, + {"11000"_b, "csinv_64_condsel"}, + {"11001"_b, "csneg_64_condsel"}, + {"11100"_b, "_rmyzpp"}, + {"11101"_b, "_npjnlv"}, + {"11110"_b, "_yhhsns"}, + {"11111"_b, "_vllmnt"}, + }, + }, + + { "_yzxjnk", + {9, 8, 7, 6, 5}, + { {"11111"_b, "paciza_64z_dp_1src"}, + }, + }, + + { "_zghtll", + {22, 20, 19, 18, 17, 16, 13, 12}, + { {"01111100"_b, "ldapr_32l_memop"}, + }, + }, + + { "_zgljvg", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "ldadd_32_memop"}, + {"0000100"_b, "ldclr_32_memop"}, + {"0001000"_b, "ldeor_32_memop"}, + {"0001100"_b, "ldset_32_memop"}, + {"000xx10"_b, "str_32_ldst_regoff"}, + {"0010000"_b, "ldaddl_32_memop"}, + {"0010100"_b, "ldclrl_32_memop"}, + {"0011000"_b, "ldeorl_32_memop"}, + {"0011100"_b, "ldsetl_32_memop"}, + {"001xx10"_b, "ldr_32_ldst_regoff"}, + {"0100000"_b, "ldadda_32_memop"}, + {"0100100"_b, "ldclra_32_memop"}, + {"0101000"_b, "ldeora_32_memop"}, + {"0101100"_b, "ldseta_32_memop"}, + {"010xx10"_b, "ldrsw_64_ldst_regoff"}, + {"0110000"_b, "ldaddal_32_memop"}, + {"0110100"_b, "ldclral_32_memop"}, + {"0111000"_b, "ldeoral_32_memop"}, + {"0111100"_b, "ldsetal_32_memop"}, + {"1000000"_b, "ldadd_64_memop"}, + {"1000100"_b, "ldclr_64_memop"}, + {"1001000"_b, "ldeor_64_memop"}, + {"1001100"_b, "ldset_64_memop"}, + {"100xx10"_b, "str_64_ldst_regoff"}, + {"1010000"_b, "ldaddl_64_memop"}, + {"1010100"_b, "ldclrl_64_memop"}, + {"1011000"_b, "ldeorl_64_memop"}, + {"1011100"_b, "ldsetl_64_memop"}, + {"101xx10"_b, "ldr_64_ldst_regoff"}, + {"10xxx01"_b, "ldraa_64_ldst_pac"}, + {"10xxx11"_b, "ldraa_64w_ldst_pac"}, + {"1100000"_b, "ldadda_64_memop"}, + {"1100100"_b, "ldclra_64_memop"}, + {"1101000"_b, "ldeora_64_memop"}, + {"1101100"_b, "ldseta_64_memop"}, + {"1110000"_b, "ldaddal_64_memop"}, + {"1110100"_b, "ldclral_64_memop"}, + {"1111000"_b, "ldeoral_64_memop"}, + {"1111100"_b, "ldsetal_64_memop"}, + {"11xxx01"_b, "ldrab_64_ldst_pac"}, + {"11xxx11"_b, "ldrab_64w_ldst_pac"}, + }, + }, + + { "_zjjxjl", + {9}, + { {"0"_b, "pnext_p_p_p"}, + }, + }, + + { "_zjqssg", + {23, 22, 20, 19, 17, 16, 13}, + { {"0000000"_b, "_jqsjtj"}, + {"0000001"_b, "_rspmth"}, + {"0100000"_b, "_txkmvh"}, + {"0100001"_b, "_ngnxrx"}, + {"100xxx0"_b, "st2_asisdlsep_r2_r"}, + {"100xxx1"_b, "st1_asisdlsep_r2_r2"}, + {"1010xx0"_b, "st2_asisdlsep_r2_r"}, + {"1010xx1"_b, "st1_asisdlsep_r2_r2"}, + {"10110x0"_b, "st2_asisdlsep_r2_r"}, + {"10110x1"_b, "st1_asisdlsep_r2_r2"}, + {"1011100"_b, "st2_asisdlsep_r2_r"}, + {"1011101"_b, "st1_asisdlsep_r2_r2"}, + {"1011110"_b, "_zyzsql"}, + {"1011111"_b, "_kqsqly"}, + {"110xxx0"_b, "ld2_asisdlsep_r2_r"}, + {"110xxx1"_b, "ld1_asisdlsep_r2_r2"}, + {"1110xx0"_b, "ld2_asisdlsep_r2_r"}, + {"1110xx1"_b, "ld1_asisdlsep_r2_r2"}, + {"11110x0"_b, "ld2_asisdlsep_r2_r"}, + {"11110x1"_b, "ld1_asisdlsep_r2_r2"}, + {"1111100"_b, "ld2_asisdlsep_r2_r"}, + {"1111101"_b, "ld1_asisdlsep_r2_r2"}, + {"1111110"_b, "_xnrxym"}, + {"1111111"_b, "_yyyxhk"}, + }, + }, + + { "_zjrsrx", + {30, 23}, + { {"00"_b, "add_64_addsub_imm"}, + {"10"_b, "sub_64_addsub_imm"}, + }, + }, + + { "_zjzmvh", + {23, 22, 20, 19, 18, 17, 16}, + { {"0001010"_b, "fcvtx_z_p_z_d2s"}, + {"0011xx0"_b, "flogb_z_p_z"}, + {"0110010"_b, "scvtf_z_p_z_h2fp16"}, + {"0110011"_b, "ucvtf_z_p_z_h2fp16"}, + {"0110100"_b, "scvtf_z_p_z_w2fp16"}, + {"0110101"_b, "ucvtf_z_p_z_w2fp16"}, + {"0110110"_b, "scvtf_z_p_z_x2fp16"}, + {"0110111"_b, "ucvtf_z_p_z_x2fp16"}, + {"0111010"_b, "fcvtzs_z_p_z_fp162h"}, + {"0111011"_b, "fcvtzu_z_p_z_fp162h"}, + {"0111100"_b, "fcvtzs_z_p_z_fp162w"}, + {"0111101"_b, "fcvtzu_z_p_z_fp162w"}, + {"0111110"_b, "fcvtzs_z_p_z_fp162x"}, + {"0111111"_b, "fcvtzu_z_p_z_fp162x"}, + {"1001000"_b, "fcvt_z_p_z_s2h"}, + {"1001001"_b, "fcvt_z_p_z_h2s"}, + {"1001010"_b, "bfcvt_z_p_z_s2bf"}, + {"1010100"_b, "scvtf_z_p_z_w2s"}, + {"1010101"_b, "ucvtf_z_p_z_w2s"}, + {"1011100"_b, "fcvtzs_z_p_z_s2w"}, + {"1011101"_b, "fcvtzu_z_p_z_s2w"}, + {"1101000"_b, "fcvt_z_p_z_d2h"}, + {"1101001"_b, "fcvt_z_p_z_h2d"}, + {"1101010"_b, "fcvt_z_p_z_d2s"}, + {"1101011"_b, "fcvt_z_p_z_s2d"}, + {"1110000"_b, "scvtf_z_p_z_w2d"}, + {"1110001"_b, "ucvtf_z_p_z_w2d"}, + {"1110100"_b, "scvtf_z_p_z_x2s"}, + {"1110101"_b, "ucvtf_z_p_z_x2s"}, + {"1110110"_b, "scvtf_z_p_z_x2d"}, + {"1110111"_b, "ucvtf_z_p_z_x2d"}, + {"1111000"_b, "fcvtzs_z_p_z_d2w"}, + {"1111001"_b, "fcvtzu_z_p_z_d2w"}, + {"1111100"_b, "fcvtzs_z_p_z_s2x"}, + {"1111101"_b, "fcvtzu_z_p_z_s2x"}, + {"1111110"_b, "fcvtzs_z_p_z_d2x"}, + {"1111111"_b, "fcvtzu_z_p_z_d2x"}, + {"xx00000"_b, "frintn_z_p_z"}, + {"xx00001"_b, "frintp_z_p_z"}, + {"xx00010"_b, "frintm_z_p_z"}, + {"xx00011"_b, "frintz_z_p_z"}, + {"xx00100"_b, "frinta_z_p_z"}, + {"xx00110"_b, "frintx_z_p_z"}, + {"xx00111"_b, "frinti_z_p_z"}, + {"xx01100"_b, "frecpx_z_p_z"}, + {"xx01101"_b, "fsqrt_z_p_z"}, + }, + }, + + { "_zkhjsp", + {11}, + { {"0"_b, "sqdmulh_z_zzi_h"}, + {"1"_b, "mul_z_zzi_h"}, + }, + }, + + { "_zlhlqy", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "frintm_asimdmiscfp16_r"}, + {"0x00001"_b, "frintm_asimdmisc_r"}, + {"1111001"_b, "frintz_asimdmiscfp16_r"}, + {"1x00001"_b, "frintz_asimdmisc_r"}, + {"xx00000"_b, "cmeq_asimdmisc_z"}, + }, + }, + + { "_zlkygr", + {13, 12}, + { {"00"_b, "cpyfe_cpy_memcms"}, + {"01"_b, "cpyfewt_cpy_memcms"}, + {"10"_b, "cpyfert_cpy_memcms"}, + {"11"_b, "cpyfet_cpy_memcms"}, + }, + }, + + { "_zlmyjt", + {23, 22}, + { {"00"_b, "fcsel_s_floatsel"}, + {"01"_b, "fcsel_d_floatsel"}, + {"11"_b, "fcsel_h_floatsel"}, + }, + }, + + { "_zlqnks", + {23, 22, 20, 19, 17, 16, 13}, + { {"0000000"_b, "_kmqlmz"}, + {"0000001"_b, "_tklxhy"}, + {"0100000"_b, "_mtshvn"}, + {"0100001"_b, "_gzzsgh"}, + {"100xxx0"_b, "st1_asisdlsop_bx1_r1b"}, + {"100xxx1"_b, "st3_asisdlsop_bx3_r3b"}, + {"1010xx0"_b, "st1_asisdlsop_bx1_r1b"}, + {"1010xx1"_b, "st3_asisdlsop_bx3_r3b"}, + {"10110x0"_b, "st1_asisdlsop_bx1_r1b"}, + {"10110x1"_b, "st3_asisdlsop_bx3_r3b"}, + {"1011100"_b, "st1_asisdlsop_bx1_r1b"}, + {"1011101"_b, "st3_asisdlsop_bx3_r3b"}, + {"1011110"_b, "_tvrlgz"}, + {"1011111"_b, "_nkmkvz"}, + {"110xxx0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"110xxx1"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1110xx0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"1110xx1"_b, "ld3_asisdlsop_bx3_r3b"}, + {"11110x0"_b, "ld1_asisdlsop_bx1_r1b"}, + {"11110x1"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1111100"_b, "ld1_asisdlsop_bx1_r1b"}, + {"1111101"_b, "ld3_asisdlsop_bx3_r3b"}, + {"1111110"_b, "_kkpxth"}, + {"1111111"_b, "_rlylxh"}, + }, + }, + + { "_zlvjrh", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldxr_lr32_ldstexcl"}, + }, + }, + + { "_zmhqmr", + {13, 12}, + { {"10"_b, "lsrv_32_dp_2src"}, + }, + }, + + { "_zmkntq", + {18}, + { {"0"_b, "ld1_asisdlsop_hx1_r1h"}, + {"1"_b, "ld1_asisdlsop_h1_i1h"}, + }, + }, + + { "_zmkqxl", + {23, 10}, + { {"00"_b, "adclb_z_zzz"}, + {"01"_b, "adclt_z_zzz"}, + {"10"_b, "sbclb_z_zzz"}, + {"11"_b, "sbclt_z_zzz"}, + }, + }, + + { "_zmrhxx", + {30, 23, 22}, + { {"000"_b, "smov_asimdins_w_w"}, + {"100"_b, "smov_asimdins_x_x"}, + }, + }, + + { "_zmtkvx", + {13, 10}, + { {"00"_b, "_rhpmjz"}, + }, + }, + + { "_zpjzst", + {23, 22, 20, 19, 18, 17, 16}, + { {"0111001"_b, "fcvtnu_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtnu_asimdmisc_r"}, + {"1111001"_b, "fcvtpu_asimdmiscfp16_r"}, + {"1x00001"_b, "fcvtpu_asimdmisc_r"}, + {"xx10000"_b, "umaxv_asimdall_only"}, + {"xx10001"_b, "uminv_asimdall_only"}, + }, + }, + + { "_zprgxt", + {18, 17, 12}, + { {"0x0"_b, "st4_asisdlsop_dx4_r4d"}, + {"100"_b, "st4_asisdlsop_dx4_r4d"}, + {"110"_b, "st4_asisdlsop_d4_i4d"}, + }, + }, + + { "_zpxrnm", + {30, 23, 22}, + { {"110"_b, "xar_vvv2_crypto3_imm6"}, + }, + }, + + { "_zqhhlq", + {20, 19, 18, 17, 16}, + { {"11111"_b, "stllrh_sl32_ldstexcl"}, + }, + }, + + { "_zqjgzz", + {30, 23, 22}, + { {"000"_b, "add_64_addsub_ext"}, + {"100"_b, "sub_64_addsub_ext"}, + }, + }, + + { "_zqlzzp", + {2, 1}, + { {"11"_b, "braaz_64_branch_reg"}, + }, + }, + + { "_zqmrhp", + {23, 22, 4, 3, 2, 1, 0}, + { {"0000000"_b, "wrffr_f_p"}, + }, + }, + + { "_zqmvqs", + {23, 22, 20, 19, 16, 13, 12}, + { {"0111110"_b, "fcvtns_asisdmiscfp16_r"}, + {"0111111"_b, "fcvtms_asisdmiscfp16_r"}, + {"0x00110"_b, "fcvtns_asisdmisc_r"}, + {"0x00111"_b, "fcvtms_asisdmisc_r"}, + {"1111110"_b, "fcvtps_asisdmiscfp16_r"}, + {"1111111"_b, "fcvtzs_asisdmiscfp16_r"}, + {"1x00110"_b, "fcvtps_asisdmisc_r"}, + {"1x00111"_b, "fcvtzs_asisdmisc_r"}, + {"xx00000"_b, "cmgt_asisdmisc_z"}, + {"xx00001"_b, "cmeq_asisdmisc_z"}, + {"xx00010"_b, "cmlt_asisdmisc_z"}, + {"xx00011"_b, "abs_asisdmisc_r"}, + {"xx10111"_b, "addp_asisdpair_only"}, + }, + }, + + { "_zqxkxg", + {13, 12}, + { {"00"_b, "subp_64s_dp_2src"}, + {"01"_b, "irg_64i_dp_2src"}, + {"10"_b, "lslv_64_dp_2src"}, + {"11"_b, "pacga_64p_dp_2src"}, + }, + }, + + { "_zrmgjx", + {30, 23, 22, 13, 4}, + { {"01000"_b, "ldr_p_bi"}, + {"01100"_b, "prfb_i_p_bi_s"}, + {"01110"_b, "prfh_i_p_bi_s"}, + {"10x0x"_b, "ld1sw_z_p_bz_d_x32_unscaled"}, + {"10x1x"_b, "ldff1sw_z_p_bz_d_x32_unscaled"}, + }, + }, + + { "_zrpzss", + {30, 23, 22, 13, 12, 11, 10}, + { {"0000000"_b, "swpp_128_memop_128"}, + {"0000100"_b, "rcwclrp_128_memop_128"}, + {"0001000"_b, "rcwswpp_128_memop_128"}, + {"0001100"_b, "rcwsetp_128_memop_128"}, + {"0010000"_b, "swppl_128_memop_128"}, + {"0010100"_b, "rcwclrpl_128_memop_128"}, + {"0011000"_b, "rcwswppl_128_memop_128"}, + {"0011100"_b, "rcwsetpl_128_memop_128"}, + {"0100000"_b, "swppa_128_memop_128"}, + {"0100100"_b, "rcwclrpa_128_memop_128"}, + {"0101000"_b, "rcwswppa_128_memop_128"}, + {"0101100"_b, "rcwsetpa_128_memop_128"}, + {"0110000"_b, "swppal_128_memop_128"}, + {"0110100"_b, "rcwclrpal_128_memop_128"}, + {"0111000"_b, "rcwswppal_128_memop_128"}, + {"0111100"_b, "rcwsetpal_128_memop_128"}, + {"1000100"_b, "rcwsclrp_128_memop_128"}, + {"1001000"_b, "rcwsswpp_128_memop_128"}, + {"1001100"_b, "rcwssetp_128_memop_128"}, + {"1010100"_b, "rcwsclrpl_128_memop_128"}, + {"1011000"_b, "rcwsswppl_128_memop_128"}, + {"1011100"_b, "rcwssetpl_128_memop_128"}, + {"1100100"_b, "rcwsclrpa_128_memop_128"}, + {"1101000"_b, "rcwsswppa_128_memop_128"}, + {"1101100"_b, "rcwssetpa_128_memop_128"}, + {"1110100"_b, "rcwsclrpal_128_memop_128"}, + {"1111000"_b, "rcwsswppal_128_memop_128"}, + {"1111100"_b, "rcwssetpal_128_memop_128"}, + }, + }, + + { "_zrqtgx", + {30}, + { {"0"_b, "bl_only_branch_imm"}, + {"1"_b, "_rxnnvv"}, + }, + }, + + { "_zrxhzq", + {19}, + { {"0"_b, "_kjsrkm"}, + {"1"_b, "sys_cr_systeminstrs"}, + }, + }, + + { "_zryvjk", + {20, 9, 4}, + { {"000"_b, "trn2_p_pp"}, + }, + }, + + { "_zsgpsn", + {20, 19, 18, 17, 16, 13, 12, 3, 2, 1, 0}, + { {"00000001101"_b, "setf16_only_setf"}, + }, + }, + + { "_zsltyl", + {22, 20, 11}, + { {"000"_b, "uqincw_r_rs_uw"}, + {"001"_b, "uqdecw_r_rs_uw"}, + {"010"_b, "uqincw_r_rs_x"}, + {"011"_b, "uqdecw_r_rs_x"}, + {"100"_b, "uqincd_r_rs_uw"}, + {"101"_b, "uqdecd_r_rs_uw"}, + {"110"_b, "uqincd_r_rs_x"}, + {"111"_b, "uqdecd_r_rs_x"}, + }, + }, + + { "_zspprz", + {20, 19, 17, 16, 12, 11, 10}, + { {"0000xxx"_b, "_srnkng"}, + {"0001xxx"_b, "_thkkgx"}, + {"0010xxx"_b, "_grgrpt"}, + {"0011xxx"_b, "_rkskkv"}, + {"0110100"_b, "_rvsylx"}, + {"0111100"_b, "_plymgg"}, + {"1000xxx"_b, "_prytjs"}, + {"1001xxx"_b, "_rrvltp"}, + {"1010xxx"_b, "_syrmmr"}, + {"1011xxx"_b, "_lnkrzt"}, + {"1100xxx"_b, "_smmrpj"}, + }, + }, + + { "_ztjjnh", + {30, 23, 22}, + { {"100"_b, "eor3_vvv16_crypto4"}, + {"101"_b, "sm3ss1_vvv4_crypto4"}, + {"110"_b, "xar_vvv2_crypto3_imm6"}, + }, + }, + + { "_ztlysk", + {23, 22, 20, 19, 18, 17, 16}, + { {"0010000"_b, "fmaxnmv_asimdall_only_h"}, + {"0111001"_b, "fcvtas_asimdmiscfp16_r"}, + {"0x00001"_b, "fcvtas_asimdmisc_r"}, + {"1010000"_b, "fminnmv_asimdall_only_h"}, + {"1111000"_b, "fcmgt_asimdmiscfp16_fz"}, + {"1x00000"_b, "fcmgt_asimdmisc_fz"}, + {"1x00001"_b, "urecpe_asimdmisc_r"}, + }, + }, + + { "_ztpryr", + {13}, + { {"0"_b, "fmad_z_p_zzz"}, + {"1"_b, "fmsb_z_p_zzz"}, + }, + }, + + { "_ztyqrj", + {30, 23, 13, 12, 10}, + { {"00000"_b, "_jmvgsp"}, + {"00001"_b, "_jkkqvy"}, + {"00100"_b, "_nkxhsy"}, + {"00101"_b, "_gshrzq"}, + {"00110"_b, "_zvjrlz"}, + {"00111"_b, "_ntjpsx"}, + {"01000"_b, "_mqrzzk"}, + {"01001"_b, "_jqxqql"}, + {"01100"_b, "_xznsqh"}, + {"01101"_b, "_qvlnll"}, + {"01110"_b, "_kvnqhn"}, + {"01111"_b, "_zsltyl"}, + {"10110"_b, "_zkhjsp"}, + {"10111"_b, "_hvyjnk"}, + {"11000"_b, "_sjvhlq"}, + {"11001"_b, "_xhktsk"}, + {"11010"_b, "_rtpztp"}, + {"11011"_b, "_rznrqt"}, + {"11100"_b, "_kyspnn"}, + {"11101"_b, "_qljhnp"}, + {"11110"_b, "_pxyrpm"}, + {"11111"_b, "_khjvqq"}, + }, + }, + + { "_zvjrlz", + {22, 20, 11}, + { {"000"_b, "sqincb_r_rs_sx"}, + {"001"_b, "sqdecb_r_rs_sx"}, + {"010"_b, "sqincb_r_rs_x"}, + {"011"_b, "sqdecb_r_rs_x"}, + {"100"_b, "sqinch_r_rs_sx"}, + {"101"_b, "sqdech_r_rs_sx"}, + {"110"_b, "sqinch_r_rs_x"}, + {"111"_b, "sqdech_r_rs_x"}, + }, + }, + + { "_zvvvhr", + {13, 12, 11, 10}, + { {"0000"_b, "smlal_asimddiff_l"}, + {"0001"_b, "add_asimdsame_only"}, + {"0010"_b, "_njnsqm"}, + {"0011"_b, "cmtst_asimdsame_only"}, + {"0100"_b, "sqdmlal_asimddiff_l"}, + {"0101"_b, "mla_asimdsame_only"}, + {"0110"_b, "_zlhlqy"}, + {"0111"_b, "mul_asimdsame_only"}, + {"1000"_b, "smlsl_asimddiff_l"}, + {"1001"_b, "smaxp_asimdsame_only"}, + {"1010"_b, "_nknntn"}, + {"1011"_b, "sminp_asimdsame_only"}, + {"1100"_b, "sqdmlsl_asimddiff_l"}, + {"1101"_b, "sqdmulh_asimdsame_only"}, + {"1110"_b, "_lyzhrq"}, + {"1111"_b, "addp_asimdsame_only"}, + }, + }, + + { "_zvxxjk", + {30, 23, 22, 13, 12, 11, 10}, + { {"000xxxx"_b, "madd_64a_dp_3src"}, + {"0011111"_b, "smulh_64_dp_3src"}, + {"0111111"_b, "umulh_64_dp_3src"}, + }, + }, + + { "_zvynrg", + {19}, + { {"0"_b, "_hnkyxy"}, + {"1"_b, "sys_cr_systeminstrs"}, + }, + }, + + { "_zxjkmj", + {22, 4, 3}, + { {"00x"_b, "prfm_p_ldst_regoff"}, + {"010"_b, "prfm_p_ldst_regoff"}, + {"011"_b, "rprfm_r_ldst_regoff"}, + }, + }, + + { "_zxklzp", + {12}, + { {"0"_b, "ld1_asisdlsop_dx1_r1d"}, + }, + }, + + { "_zxtzmv", + {30, 23, 22, 13}, + { {"0010"_b, "ld1rsh_z_p_bi_s64"}, + {"0011"_b, "ld1rsh_z_p_bi_s32"}, + {"0110"_b, "ld1rsb_z_p_bi_s64"}, + {"0111"_b, "ld1rsb_z_p_bi_s32"}, + {"1000"_b, "ld1sw_z_p_ai_d"}, + {"1001"_b, "ldff1sw_z_p_ai_d"}, + {"1010"_b, "ld1sw_z_p_bz_d_64_scaled"}, + {"1011"_b, "ldff1sw_z_p_bz_d_64_scaled"}, + }, + }, + + { "_zyhgnz", + {18}, + { {"0"_b, "ld4_asisdlse_r4"}, + }, + }, + + { "_zyjjgs", + {23, 22, 20, 19, 18}, + { {"00000"_b, "orr_z_zi"}, + {"01000"_b, "eor_z_zi"}, + {"10000"_b, "and_z_zi"}, + {"11000"_b, "dupm_z_i"}, + {"xx1xx"_b, "cpy_z_o_i"}, + }, + }, + + { "_zyxnpz", + {13, 12, 11, 10}, + { {"1111"_b, "casa_c32_ldstexcl"}, + }, + }, + + { "_zyzsql", + {18}, + { {"0"_b, "st2_asisdlsep_r2_r"}, + {"1"_b, "st2_asisdlsep_i2_i"}, + }, + }, + + { "_zzhnxv", + {30, 23, 22, 20, 19}, + { {"0xxxx"_b, "bl_only_branch_imm"}, + {"10001"_b, "sysl_rc_systeminstrs"}, + {"1001x"_b, "mrs_rs_systemmove"}, + {"1011x"_b, "mrrs_rs_systemmovepr"}, + }, + }, + + { "_zzkgsk", + {20, 19, 18, 17, 16}, + { {"11111"_b, "ldaxr_lr32_ldstexcl"}, + }, + }, + + { "_zztypv", + {6, 5}, + { {"00"_b, "cfinv_m_pstate"}, + {"01"_b, "xaflag_m_pstate"}, + {"10"_b, "axflag_m_pstate"}, + }, + }, + + { "_zzvxvh", + {23, 22, 11, 10}, + { {"0001"_b, "pmul_z_zz"}, + {"xx00"_b, "mul_z_zz"}, + {"xx10"_b, "smulh_z_zz"}, + {"xx11"_b, "umulh_z_zz"}, + }, + }, + + { "Root", + {31, 29, 28, 27, 26, 25, 24, 21, 15, 14}, + { {"00000000xx"_b, "_nqmnzp"}, + {"0000100000"_b, "_rzzxsn"}, + {"0000100001"_b, "_xvppmm"}, + {"0000100010"_b, "_ptsjnr"}, + {"0000100011"_b, "_nlpmvl"}, + {"0000100100"_b, "_ljljkv"}, + {"0000100101"_b, "_kktglv"}, + {"0000100110"_b, "_ppnssm"}, + {"0000100111"_b, "_ztyqrj"}, + {"0000101000"_b, "_rnqtmt"}, + {"0000101001"_b, "_rlpmrx"}, + {"0000101010"_b, "_mpvsng"}, + {"0000101011"_b, "_qlxksl"}, + {"0000101100"_b, "_mhrjvp"}, + {"0000101101"_b, "_pgjjsz"}, + {"0000101110"_b, "_yppyky"}, + {"0000101111"_b, "_yjmngt"}, + {"0001000001"_b, "_thqgrq"}, + {"0001000011"_b, "_hkgzsh"}, + {"0001000101"_b, "_ktpxrr"}, + {"0001000111"_b, "_stlgrr"}, + {"00010100xx"_b, "_vtyqhh"}, + {"00010101xx"_b, "_tytzpq"}, + {"00010110xx"_b, "_tqlsyy"}, + {"00010111xx"_b, "_htkpks"}, + {"0001100000"_b, "_myvqtn"}, + {"0001100001"_b, "_nmqskh"}, + {"0001100010"_b, "_xrkzpn"}, + {"0001101000"_b, "_zlqnks"}, + {"0001101001"_b, "_vtllgt"}, + {"0001101010"_b, "_ghqqzy"}, + {"0001101011"_b, "_xrskrk"}, + {"0001101100"_b, "_rzpqmm"}, + {"0001101101"_b, "_pyvvqx"}, + {"0001101110"_b, "_shgktt"}, + {"0001101111"_b, "_szylpy"}, + {"0001110000"_b, "_jgxqzr"}, + {"0001110001"_b, "_jrqxvn"}, + {"0001110010"_b, "_lplpkk"}, + {"0001110100"_b, "_kgpsjz"}, + {"0001110101"_b, "_hsrkqt"}, + {"0001110110"_b, "_zvvvhr"}, + {"0001110111"_b, "_kssltr"}, + {"0001111000"_b, "_vzzqhx"}, + {"0001111001"_b, "_ktngnm"}, + {"0001111010"_b, "_ttsgkt"}, + {"0001111011"_b, "_phtxqg"}, + {"0001111100"_b, "_yysxts"}, + {"0001111101"_b, "_msnshr"}, + {"0001111110"_b, "_nmqrtr"}, + {"0001111111"_b, "_gnxrlr"}, + {"00100010xx"_b, "_hmjrmm"}, + {"00100011xx"_b, "_nxlmhz"}, + {"0010010xxx"_b, "_hqkljv"}, + {"001001100x"_b, "_hvrjyt"}, + {"001001110x"_b, "_kgygky"}, + {"0010011x1x"_b, "_lkpprr"}, + {"0010100xxx"_b, "_vyjsst"}, + {"0010110xxx"_b, "_qvjmmq"}, + {"00101x1xxx"_b, "_lxggmz"}, + {"0011000xxx"_b, "_yjktml"}, + {"0011001000"_b, "_mqtgvk"}, + {"0011001001"_b, "_hvnhmh"}, + {"0011001010"_b, "_gsnnnt"}, + {"0011001011"_b, "_vxvyyg"}, + {"0011001100"_b, "_jkvsxy"}, + {"0011001110"_b, "_zrpzss"}, + {"0011010000"_b, "_rsqxrs"}, + {"0011010001"_b, "_rktqym"}, + {"001101001x"_b, "_vqrqjt"}, + {"001101100x"_b, "_rtlvxq"}, + {"001101101x"_b, "_gtqnvr"}, + {"0011100xxx"_b, "_yzpszn"}, + {"0011101000"_b, "_hhxpyt"}, + {"0011101001"_b, "_htrtzz"}, + {"0011101010"_b, "_rkxlyj"}, + {"0011101011"_b, "_vnggzq"}, + {"0011110000"_b, "_mrlpxr"}, + {"0011110001"_b, "_xszqrg"}, + {"001111001x"_b, "_plyxlq"}, + {"0011110100"_b, "_rqpjjs"}, + {"0011110101"_b, "_ttmyrv"}, + {"0011110110"_b, "_lvjtlg"}, + {"0011110111"_b, "_lnntps"}, + {"0011111000"_b, "_vtgnnl"}, + {"0011111001"_b, "_mxnzst"}, + {"0011111010"_b, "_lvryvp"}, + {"0011111011"_b, "_mqssgy"}, + {"0011111100"_b, "_pxzvjl"}, + {"0011111101"_b, "_mnxgqm"}, + {"0011111110"_b, "_qntrvk"}, + {"0011111111"_b, "_vnnjxg"}, + {"0100100000"_b, "_yyyshx"}, + {"0100100001"_b, "_mylphg"}, + {"0100100010"_b, "_nsjhhg"}, + {"0100100011"_b, "_rhhrhg"}, + {"0100100100"_b, "_ymhgxg"}, + {"0100100101"_b, "_nvkthr"}, + {"0100100110"_b, "_phthqj"}, + {"0100100111"_b, "_kyjxrr"}, + {"0100101000"_b, "_gzvylr"}, + {"0100101001"_b, "_pppsmg"}, + {"0100101010"_b, "_pyjnpz"}, + {"0100101011"_b, "_shqygv"}, + {"0100101100"_b, "_hzsxkp"}, + {"0100101101"_b, "_nqkhrv"}, + {"0100101110"_b, "_tkjtgp"}, + {"0100101111"_b, "_htqpks"}, + {"0101000xxx"_b, "_ssvpxz"}, + {"0101001xxx"_b, "_vgqvys"}, + {"01010100xx"_b, "_qkrnms"}, + {"01010101xx"_b, "_vypnss"}, + {"01010110xx"_b, "_glkvkr"}, + {"01010111xx"_b, "_qgqgkx"}, + {"0101100xxx"_b, "_mxplnn"}, + {"0101101xxx"_b, "_pqmqrg"}, + {"0101110000"_b, "_gshlgj"}, + {"0101110001"_b, "_klsmsv"}, + {"0101110010"_b, "_xhhqnx"}, + {"0101110011"_b, "_rssrty"}, + {"0101110100"_b, "_nzskzl"}, + {"0101110101"_b, "_qlzvpg"}, + {"0101110110"_b, "_hlxmpy"}, + {"0101110111"_b, "_lplzxv"}, + {"0101111000"_b, "_krtvhr"}, + {"0101111001"_b, "_ymtzjg"}, + {"0101111010"_b, "_szgqrr"}, + {"0101111011"_b, "_xnpyvy"}, + {"0101111100"_b, "_tnngsg"}, + {"0101111101"_b, "_kshtnj"}, + {"0101111110"_b, "_vmxzxt"}, + {"0101111111"_b, "_gxqnph"}, + {"0110001xxx"_b, "_ykptgl"}, + {"0110010xxx"_b, "_slzvjh"}, + {"0110011xxx"_b, "_nqlrmv"}, + {"0110100xxx"_b, "_yrjqql"}, + {"0110101xxx"_b, "_prgrzz"}, + {"01110000xx"_b, "_vshynq"}, + {"0111000100"_b, "_ykjhgg"}, + {"0111000101"_b, "_jqtksx"}, + {"0111000110"_b, "_gzpkvm"}, + {"0111000111"_b, "_jhkkgv"}, + {"0111001xxx"_b, "_yptvyx"}, + {"0111010000"_b, "_tzrgqq"}, + {"0111010001"_b, "_qlpnnn"}, + {"011101001x"_b, "_grsslr"}, + {"01111000xx"_b, "_xjtzgm"}, + {"0111100101"_b, "_srsrtk"}, + {"0111100111"_b, "_xynxhx"}, + {"01111001x0"_b, "_gylmmr"}, + {"0111101xxx"_b, "_mkzysy"}, + {"0111110000"_b, "_nklvmv"}, + {"0111110010"_b, "_pyttkp"}, + {"0111110100"_b, "_lrqlrg"}, + {"0111110101"_b, "_yvxkhv"}, + {"0111110110"_b, "_ksgpqz"}, + {"0111110111"_b, "_hkpjqm"}, + {"0111111000"_b, "_lgzlyq"}, + {"0111111001"_b, "_yrypnt"}, + {"0111111010"_b, "_snvnjz"}, + {"0111111011"_b, "_kkkltp"}, + {"0111111100"_b, "_xsgnlv"}, + {"0111111101"_b, "_lrptrn"}, + {"0111111110"_b, "_pyhrrt"}, + {"0111111111"_b, "_nkyrpv"}, + {"0x10000xxx"_b, "adr_only_pcreladdr"}, + {"1000100000"_b, "_lspzrv"}, + {"1000100001"_b, "_kxvvkq"}, + {"1000100010"_b, "_sxpvym"}, + {"1000100011"_b, "_vkrkks"}, + {"1000100100"_b, "_xvnyxq"}, + {"1000100101"_b, "_gtxpgx"}, + {"1000100110"_b, "_vlrhpy"}, + {"1000100111"_b, "_ymhkrx"}, + {"1000101000"_b, "_zrmgjx"}, + {"1000101001"_b, "_qqyryl"}, + {"1000101010"_b, "_hgxtqy"}, + {"1000101011"_b, "_yytvxh"}, + {"1000101100"_b, "_ptslzg"}, + {"1000101101"_b, "_ytkjxx"}, + {"1000101110"_b, "_zxtzmv"}, + {"1000101111"_b, "_kgmqkh"}, + {"1001000001"_b, "_ptyynt"}, + {"1001000011"_b, "_skszgm"}, + {"1001000100"_b, "_rlgtnn"}, + {"1001000101"_b, "_rgxthl"}, + {"1001000110"_b, "_xxphlt"}, + {"1001000111"_b, "_njjlxy"}, + {"10010100xx"_b, "_tnpjts"}, + {"10010101xx"_b, "_hgjgpm"}, + {"10010110xx"_b, "_hqnsvg"}, + {"10010111xx"_b, "_zqjgzz"}, + {"100111000x"_b, "_ztjjnh"}, + {"1001110010"_b, "_lssjyz"}, + {"1001110011"_b, "_zpxrnm"}, + {"100111010x"_b, "_jkvvtp"}, + {"1001110110"_b, "_sqhxzj"}, + {"1001110111"_b, "_hrxtnj"}, + {"1010001000"_b, "_vrsgzg"}, + {"1010001010"_b, "_vhkjgh"}, + {"10100010x1"_b, "_rxytqg"}, + {"1010001100"_b, "_lrmgmq"}, + {"1010001110"_b, "_zjrsrx"}, + {"10100011x1"_b, "_vmgnhk"}, + {"1010010xxx"_b, "_pjlnhh"}, + {"10100110xx"_b, "_xzlxjh"}, + {"10100111xx"_b, "_rjthsm"}, + {"10101000xx"_b, "_yjnkrn"}, + {"10101001xx"_b, "_zrqtgx"}, + {"1010101000"_b, "_pxvjkp"}, + {"1010101001"_b, "_xrzqtn"}, + {"101010101x"_b, "_ttmvpr"}, + {"1010101100"_b, "_grjzyl"}, + {"1010101110"_b, "_kynxnz"}, + {"10101011x1"_b, "_zzhnxv"}, + {"1010110000"_b, "_lymhlk"}, + {"1010110100"_b, "_tpmqyl"}, + {"1010111000"_b, "_lkzyzv"}, + {"1010111100"_b, "_tvyxlr"}, + {"101011xx10"_b, "_yxvttm"}, + {"101011xxx1"_b, "_qhzvvh"}, + {"1011000xxx"_b, "_jgklkt"}, + {"1011001000"_b, "_sxptnh"}, + {"1011001001"_b, "_vmsxgq"}, + {"1011001010"_b, "_vkrskv"}, + {"1011001011"_b, "_nxrqmg"}, + {"1011001100"_b, "_lsqgkk"}, + {"1011001110"_b, "_kxpqhv"}, + {"10110011x1"_b, "_jrxtzg"}, + {"1011010000"_b, "_yzqhtj"}, + {"1011010001"_b, "_yvqnyq"}, + {"101101001x"_b, "_qpsryx"}, + {"1011011000"_b, "_vpjktn"}, + {"1011011001"_b, "_zvxxjk"}, + {"101101101x"_b, "_sztkhs"}, + {"101101110x"_b, "_hlypvy"}, + {"101101111x"_b, "_rszgzl"}, + {"1011100xxx"_b, "_ymszkr"}, + {"1011101000"_b, "_pzzgts"}, + {"1011101001"_b, "_pgvjgs"}, + {"1011101010"_b, "_kppzvh"}, + {"1011101011"_b, "_nlrjsj"}, + {"10111100xx"_b, "_rxtklv"}, + {"1011110100"_b, "_vsnnms"}, + {"1100100000"_b, "_sjtrhm"}, + {"1100100001"_b, "_hzkglv"}, + {"1100100010"_b, "_qrygny"}, + {"1100100011"_b, "_tjzqnp"}, + {"1100100100"_b, "_yqvqtx"}, + {"1100100101"_b, "_ngttyj"}, + {"1100100110"_b, "_kqzmtr"}, + {"1100100111"_b, "_qpvgnh"}, + {"1100101000"_b, "_tpkslq"}, + {"1100101001"_b, "_kzpyzy"}, + {"1100101010"_b, "_ytvtqn"}, + {"1100101011"_b, "_qkzjxm"}, + {"1100101100"_b, "_lqmksm"}, + {"1100101101"_b, "_hxlznn"}, + {"1100101110"_b, "_knkjnz"}, + {"1100101111"_b, "_rsjgyk"}, + {"1101000xxx"_b, "_mtlxqp"}, + {"1101001xxx"_b, "_vmyztj"}, + {"11010100xx"_b, "_vnzkty"}, + {"11010101xx"_b, "_vnrlrk"}, + {"11010110xx"_b, "_rjmhxr"}, + {"11010111xx"_b, "_rxgkjn"}, + {"1101100xxx"_b, "_jvkxtj"}, + {"1101101xxx"_b, "_srpqmk"}, + {"1110001xxx"_b, "_pkskpp"}, + {"1110010xxx"_b, "_mgspnm"}, + {"1110011xxx"_b, "_snhzxr"}, + {"1110100xxx"_b, "_jxgqqz"}, + {"1110101xxx"_b, "_qnysqv"}, + {"11110000xx"_b, "_rqghyv"}, + {"1111000100"_b, "_zgljvg"}, + {"1111000101"_b, "_yjnmkg"}, + {"1111000110"_b, "_jqhvhn"}, + {"1111000111"_b, "_vnsqhn"}, + {"1111001xxx"_b, "_hvhrsq"}, + {"1111010000"_b, "_gyjphh"}, + {"1111010010"_b, "_mkrgxr"}, + {"11110100x1"_b, "_mtzhrn"}, + {"11111000xx"_b, "_hlljqz"}, + {"11111001xx"_b, "_qzsyvx"}, + {"1111101xxx"_b, "_jqlgts"}, + {"1x10000xxx"_b, "adrp_only_pcreladdr"}, + {"x110110xxx"_b, "_xymnxy"}, + {"x110111xxx"_b, "_htjmmx"}, + }, + }, + +}; +// clang-format on + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h new file mode 100644 index 0000000000..8ae438c104 --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/decoder-visitor-map-aarch64.h @@ -0,0 +1,2949 @@ +// Copyright 2020, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Initialisation data for a std::map, from instruction form to the visitor +// function that handles it. This allows reuse of existing visitor functions +// that support groups of instructions, though they may do extra decoding +// no longer needed. +// In the long term, it's expected that each component that uses the decoder +// will want to group instruction handling in the way most appropriate to +// the component's function, so this map initialisation will no longer be +// shared. + +#define DEFAULT_FORM_TO_VISITOR_MAP(VISITORCLASS) \ + {"abs_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"addpl_r_ri"_h, &VISITORCLASS::VisitSVEStackFrameAdjustment}, \ + {"addvl_r_ri"_h, &VISITORCLASS::VisitSVEStackFrameAdjustment}, \ + {"add_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \ + {"add_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"add_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"adr_z_az_d_s32_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \ + {"adr_z_az_d_u32_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \ + {"adr_z_az_sd_same_scaled"_h, &VISITORCLASS::VisitSVEAddressGeneration}, \ + {"ands_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"andv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"and_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"and_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"and_z_zi"_h, \ + &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \ + {"and_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"asrd_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"asrr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"asr_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"asr_z_p_zw"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \ + {"asr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"asr_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"asr_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"bics_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"bic_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"bic_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"bic_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"brkas_p_p_p_z"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brka_p_p_p"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brkbs_p_p_p_z"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brkb_p_p_p"_h, &VISITORCLASS::VisitSVEPartitionBreakCondition}, \ + {"brkns_p_p_pp"_h, \ + &VISITORCLASS::VisitSVEPropagateBreakToNextPartition}, \ + {"brkn_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreakToNextPartition}, \ + {"brkpas_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"brkpa_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"brkpbs_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"brkpb_p_p_pp"_h, &VISITORCLASS::VisitSVEPropagateBreak}, \ + {"clasta_r_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToGeneralRegister}, \ + {"clasta_v_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToSIMDFPScalar}, \ + {"clasta_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEConditionallyBroadcastElementToVector}, \ + {"clastb_r_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToGeneralRegister}, \ + {"clastb_v_p_z"_h, \ + &VISITORCLASS::VisitSVEConditionallyExtractElementToSIMDFPScalar}, \ + {"clastb_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEConditionallyBroadcastElementToVector}, \ + {"cls_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"clz_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"cmpeq_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpeq_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpeq_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpge_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpge_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpge_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpgt_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpgt_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpgt_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphi_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmphi_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphi_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphs_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmphs_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmphs_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmple_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmple_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmplo_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmplo_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpls_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareUnsignedImm}, \ + {"cmpls_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmplt_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmplt_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpne_p_p_zi"_h, &VISITORCLASS::VisitSVEIntCompareSignedImm}, \ + {"cmpne_p_p_zw"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cmpne_p_p_zz"_h, &VISITORCLASS::VisitSVEIntCompareVectors}, \ + {"cnot_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"cntb_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cntd_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cnth_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cntp_r_p_p"_h, &VISITORCLASS::VisitSVEPredicateCount}, \ + {"cntw_r_s"_h, &VISITORCLASS::VisitSVEElementCount}, \ + {"cnt_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"compact_z_p_z"_h, &VISITORCLASS::VisitSVECompressActiveElements}, \ + {"cpy_z_o_i"_h, &VISITORCLASS::VisitSVECopyIntImm_Predicated}, \ + {"cpy_z_p_i"_h, &VISITORCLASS::VisitSVECopyIntImm_Predicated}, \ + {"cpy_z_p_r"_h, \ + &VISITORCLASS::VisitSVECopyGeneralRegisterToVector_Predicated}, \ + {"cpy_z_p_v"_h, \ + &VISITORCLASS::VisitSVECopySIMDFPScalarRegisterToVector_Predicated}, \ + {"ctermeq_rr"_h, &VISITORCLASS::VisitSVEConditionallyTerminateScalars}, \ + {"ctermne_rr"_h, &VISITORCLASS::VisitSVEConditionallyTerminateScalars}, \ + {"decb_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"decd_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"decd_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"dech_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"dech_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"decp_r_p_r"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"decp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"decw_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"decw_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"dupm_z_i"_h, &VISITORCLASS::VisitSVEBroadcastBitmaskImm}, \ + {"dup_z_i"_h, &VISITORCLASS::VisitSVEBroadcastIntImm_Unpredicated}, \ + {"dup_z_r"_h, &VISITORCLASS::VisitSVEBroadcastGeneralRegister}, \ + {"dup_z_zi"_h, &VISITORCLASS::VisitSVEBroadcastIndexElement}, \ + {"eors_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"eorv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"eor_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"eor_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"eor_z_zi"_h, \ + &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \ + {"eor_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"ext_z_zi_des"_h, &VISITORCLASS::VisitSVEPermuteVectorExtract}, \ + {"fabd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fabs_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"facge_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"facgt_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fadda_v_p_z"_h, &VISITORCLASS::VisitSVEFPAccumulatingReduction}, \ + {"faddv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fadd_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fadd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fadd_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"fcadd_z_p_zz"_h, &VISITORCLASS::VisitSVEFPComplexAddition}, \ + {"fcmeq_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmeq_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmge_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmge_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmgt_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmgt_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPComplexMulAdd}, \ + {"fcmla_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPComplexMulAddIndex}, \ + {"fcmla_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPComplexMulAddIndex}, \ + {"fcmle_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmlt_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmne_p_p_z0"_h, &VISITORCLASS::VisitSVEFPCompareWithZero}, \ + {"fcmne_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcmuo_p_p_zz"_h, &VISITORCLASS::VisitSVEFPCompareVectors}, \ + {"fcpy_z_p_i"_h, &VISITORCLASS::VisitSVECopyFPImm_Predicated}, \ + {"fcvtzs_z_p_z_d2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_d2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_fp162h"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_fp162w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_fp162x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_s2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzs_z_p_z_s2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_d2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_d2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_fp162h"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_fp162w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_fp162x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_s2w"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvtzu_z_p_z_s2x"_h, &VISITORCLASS::VisitSVEFPConvertToInt}, \ + {"fcvt_z_p_z_d2h"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_d2s"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_h2d"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_h2s"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_s2d"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fcvt_z_p_z_s2h"_h, &VISITORCLASS::VisitSVEFPConvertPrecision}, \ + {"fdivr_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fdiv_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fdup_z_i"_h, &VISITORCLASS::VisitSVEBroadcastFPImm_Unpredicated}, \ + {"fexpa_z_z"_h, &VISITORCLASS::VisitSVEFPExponentialAccelerator}, \ + {"fmad_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmaxnmv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fmaxnm_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmaxnm_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmaxv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fmax_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmax_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fminnmv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fminnm_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fminnm_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fminv_v_p_z"_h, &VISITORCLASS::VisitSVEFPFastReduction}, \ + {"fmin_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmin_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmla_z_zzzi_d"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmla_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmla_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmls_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmls_z_zzzi_d"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmls_z_zzzi_h"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmls_z_zzzi_s"_h, &VISITORCLASS::VisitSVEFPMulAddIndex}, \ + {"fmsb_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fmulx_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmul_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fmul_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fmul_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"fmul_z_zzi_d"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \ + {"fmul_z_zzi_h"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \ + {"fmul_z_zzi_s"_h, &VISITORCLASS::VisitSVEFPMulIndex}, \ + {"fneg_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"fnmad_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fnmla_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fnmls_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"fnmsb_z_p_zzz"_h, &VISITORCLASS::VisitSVEFPMulAdd}, \ + {"frecpe_z_z"_h, &VISITORCLASS::VisitSVEFPUnaryOpUnpredicated}, \ + {"frecps_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"frecpx_z_p_z"_h, &VISITORCLASS::VisitSVEFPUnaryOp}, \ + {"frinta_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frinti_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintm_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintn_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintp_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintx_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frintz_z_p_z"_h, &VISITORCLASS::VisitSVEFPRoundToIntegralValue}, \ + {"frsqrte_z_z"_h, &VISITORCLASS::VisitSVEFPUnaryOpUnpredicated}, \ + {"frsqrts_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"fscale_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fsqrt_z_p_z"_h, &VISITORCLASS::VisitSVEFPUnaryOp}, \ + {"fsubr_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fsubr_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fsub_z_p_zs"_h, \ + &VISITORCLASS::VisitSVEFPArithmeticWithImm_Predicated}, \ + {"fsub_z_p_zz"_h, &VISITORCLASS::VisitSVEFPArithmetic_Predicated}, \ + {"fsub_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"ftmad_z_zzi"_h, &VISITORCLASS::VisitSVEFPTrigMulAddCoefficient}, \ + {"ftsmul_z_zz"_h, &VISITORCLASS::VisitSVEFPArithmeticUnpredicated}, \ + {"ftssel_z_zz"_h, &VISITORCLASS::VisitSVEFPTrigSelectCoefficient}, \ + {"incb_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"incd_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"incd_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"inch_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"inch_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"incp_r_p_r"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"incp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"incw_r_rs"_h, &VISITORCLASS::VisitSVEIncDecRegisterByElementCount}, \ + {"incw_z_zs"_h, &VISITORCLASS::VisitSVEIncDecVectorByElementCount}, \ + {"index_z_ii"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"index_z_ir"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"index_z_ri"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"index_z_rr"_h, &VISITORCLASS::VisitSVEIndexGeneration}, \ + {"insr_z_r"_h, &VISITORCLASS::VisitSVEInsertGeneralRegister}, \ + {"insr_z_v"_h, &VISITORCLASS::VisitSVEInsertSIMDFPScalarRegister}, \ + {"lasta_r_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToGeneralRegister}, \ + {"lasta_v_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToSIMDFPScalarRegister}, \ + {"lastb_r_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToGeneralRegister}, \ + {"lastb_v_p_z"_h, \ + &VISITORCLASS::VisitSVEExtractElementToSIMDFPScalarRegister}, \ + {"ld1b_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1b_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1b_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_bi_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1b_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_br_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1b_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1b_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1b_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1d_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1d_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1d_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1d_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1d_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1d_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1d_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1h_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1h_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1h_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1h_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1h_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1h_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1h_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1h_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1h_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1h_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1h_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1h_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1h_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ld1h_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1rb_z_p_bi_u16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rb_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rb_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rb_z_p_bi_u8"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rd_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rh_z_p_bi_u16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rh_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rh_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rqb_z_p_bi_u8"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqb_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rqd_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqd_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rqh_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqh_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rqw_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, \ + {"ld1rqw_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, \ + {"ld1rsb_z_p_bi_s16"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsb_z_p_bi_s32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsb_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsh_z_p_bi_s32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsh_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rsw_z_p_bi_s64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rw_z_p_bi_u32"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1rw_z_p_bi_u64"_h, &VISITORCLASS::VisitSVELoadAndBroadcastElement}, \ + {"ld1sb_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1sb_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1sb_z_p_bi_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sb_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sb_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sb_z_p_br_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sb_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sb_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sb_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1sb_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1sb_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1sh_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1sh_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1sh_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sh_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sh_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sh_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sh_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1sh_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1sh_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1sh_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1sh_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ld1sh_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld1sw_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1sw_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1sw_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1sw_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1sw_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1sw_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1sw_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1w_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ld1w_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ld1w_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1w_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusImm}, \ + {"ld1w_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1w_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousLoad_ScalarPlusScalar}, \ + {"ld1w_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ld1w_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ld1w_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ld1w_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ld1w_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets}, \ + {"ld1w_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ld2b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld2d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld2h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld2w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld2w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld3w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld3w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ld4w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusImm}, \ + {"ld4w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVELoadMultipleStructures_ScalarPlusScalar}, \ + {"ldff1b_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1b_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1b_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_br_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1b_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1b_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1b_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1d_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1d_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1d_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1d_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1d_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1d_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1h_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1h_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1h_z_p_br_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1h_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1h_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1h_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1h_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1h_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1h_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1h_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ldff1h_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1sb_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1sb_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1sb_z_p_br_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sb_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sb_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sb_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1sb_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1sb_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1sh_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1sh_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1sh_z_p_br_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sh_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sh_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1sh_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1sh_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1sh_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1sh_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets}, \ + {"ldff1sh_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldff1sw_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1sw_z_p_br_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1sw_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1sw_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1sw_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1sw_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1w_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_VectorPlusImm}, \ + {"ldff1w_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_VectorPlusImm}, \ + {"ldff1w_z_p_br_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1w_z_p_br_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar}, \ + {"ldff1w_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets}, \ + {"ldff1w_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets}, \ + {"ldff1w_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets}, \ + {"ldff1w_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"ldff1w_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets}, \ + {"ldff1w_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets}, \ + {"ldnf1b_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1b_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1b_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1b_z_p_bi_u8"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1d_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1h_z_p_bi_u16"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1h_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1h_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sb_z_p_bi_s16"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sb_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sb_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sh_z_p_bi_s32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sh_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1sw_z_p_bi_s64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1w_z_p_bi_u32"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnf1w_z_p_bi_u64"_h, \ + &VISITORCLASS::VisitSVEContiguousNonFaultLoad_ScalarPlusImm}, \ + {"ldnt1b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldnt1d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldnt1h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldnt1w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm}, \ + {"ldnt1w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar}, \ + {"ldr_p_bi"_h, &VISITORCLASS::VisitSVELoadPredicateRegister}, \ + {"ldr_z_bi"_h, &VISITORCLASS::VisitSVELoadVectorRegister}, \ + {"lslr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsl_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"lsl_z_p_zw"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \ + {"lsl_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsl_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"lsl_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"lsrr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsr_z_p_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftByImm_Predicated}, \ + {"lsr_z_p_zw"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByWideElements_Predicated}, \ + {"lsr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEBitwiseShiftByVector_Predicated}, \ + {"lsr_z_zi"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"lsr_z_zw"_h, &VISITORCLASS::VisitSVEBitwiseShiftUnpredicated}, \ + {"mad_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"mla_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"mls_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"movprfx_z_p_z"_h, &VISITORCLASS::VisitSVEMovprfx}, \ + {"movprfx_z_z"_h, \ + &VISITORCLASS::VisitSVEConstructivePrefix_Unpredicated}, \ + {"msb_z_p_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddPredicated}, \ + {"mul_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \ + {"mul_z_zi"_h, &VISITORCLASS::VisitSVEIntMulImm_Unpredicated}, \ + {"nands_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"nand_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"neg_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"nors_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"nor_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"not_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"orns_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orn_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orrs_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orr_p_p_pp_z"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"orr_z_p_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogical_Predicated}, \ + {"orr_z_zi"_h, \ + &VISITORCLASS::VisitSVEBitwiseLogicalWithImm_Unpredicated}, \ + {"orr_z_zz"_h, &VISITORCLASS::VisitSVEBitwiseLogicalUnpredicated}, \ + {"orv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"pfalse_p"_h, &VISITORCLASS::VisitSVEPredicateZero}, \ + {"pfirst_p_p_p"_h, &VISITORCLASS::VisitSVEPredicateFirstActive}, \ + {"pnext_p_p_p"_h, &VISITORCLASS::VisitSVEPredicateNextActive}, \ + {"prfb_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfb_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfb_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfb_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfb_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfb_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfb_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"prfd_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfd_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfd_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfd_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfd_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfd_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfd_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"prfh_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfh_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfh_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfh_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfh_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfh_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfh_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"prfw_i_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitGatherPrefetch_VectorPlusImm}, \ + {"prfw_i_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitGatherPrefetch_VectorPlusImm}, \ + {"prfw_i_p_bi_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusImm}, \ + {"prfw_i_p_br_s"_h, \ + &VISITORCLASS::VisitSVEContiguousPrefetch_ScalarPlusScalar}, \ + {"prfw_i_p_bz_d_64_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets}, \ + {"prfw_i_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"prfw_i_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets}, \ + {"ptest_p_p"_h, &VISITORCLASS::VisitSVEPredicateTest}, \ + {"ptrues_p_s"_h, &VISITORCLASS::VisitSVEPredicateInitialize}, \ + {"ptrue_p_s"_h, &VISITORCLASS::VisitSVEPredicateInitialize}, \ + {"punpkhi_p_p"_h, &VISITORCLASS::VisitSVEUnpackPredicateElements}, \ + {"punpklo_p_p"_h, &VISITORCLASS::VisitSVEUnpackPredicateElements}, \ + {"rbit_z_p_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"rdffrs_p_p_f"_h, \ + &VISITORCLASS::VisitSVEPredicateReadFromFFR_Predicated}, \ + {"rdffr_p_f"_h, \ + &VISITORCLASS::VisitSVEPredicateReadFromFFR_Unpredicated}, \ + {"rdffr_p_p_f"_h, \ + &VISITORCLASS::VisitSVEPredicateReadFromFFR_Predicated}, \ + {"rdvl_r_i"_h, &VISITORCLASS::VisitSVEStackFrameSize}, \ + {"revb_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"revh_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"revw_z_z"_h, &VISITORCLASS::VisitSVEReverseWithinElements}, \ + {"rev_p_p"_h, &VISITORCLASS::VisitSVEReversePredicateElements}, \ + {"rev_z_z"_h, &VISITORCLASS::VisitSVEReverseVectorElements}, \ + {"sabd_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"saddv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"scvtf_z_p_z_h2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_w2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_w2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_w2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_x2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_x2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"scvtf_z_p_z_x2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"sdivr_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"sdiv_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"sdot_z_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddUnpredicated}, \ + {"sdot_z_zzzi_d"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"sdot_z_zzzi_s"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"sel_p_p_pp"_h, &VISITORCLASS::VisitSVEPredicateLogical}, \ + {"sel_z_p_zz"_h, &VISITORCLASS::VisitSVEVectorSelect}, \ + {"setffr_f"_h, &VISITORCLASS::VisitSVEFFRInitialise}, \ + {"smaxv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"smax_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"smax_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"sminv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"smin_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"smin_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"smulh_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \ + {"splice_z_p_zz_des"_h, &VISITORCLASS::VisitSVEVectorSplice}, \ + {"sqadd_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sqadd_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"sqdecb_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecd_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqdech_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdech_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdech_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqdecp_r_p_r_sx"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqdecp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqdecp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqdecw_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqdecw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqincb_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincd_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqinch_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqinch_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqinch_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqincp_r_p_r_sx"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqincp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqincp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"sqincw_r_rs_sx"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"sqincw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"sqsub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sqsub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"st1b_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1b_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \ + {"st1b_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1b_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1b_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1b_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1b_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \ + {"st1d_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1d_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1d_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1d_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \ + {"st1d_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1d_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"st1d_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1h_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1h_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \ + {"st1h_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1h_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1h_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \ + {"st1h_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1h_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"st1h_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1h_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets}, \ + {"st1h_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \ + {"st1w_z_p_ai_d"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_VectorPlusImm}, \ + {"st1w_z_p_ai_s"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_VectorPlusImm}, \ + {"st1w_z_p_bi"_h, &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusImm}, \ + {"st1w_z_p_br"_h, \ + &VISITORCLASS::VisitSVEContiguousStore_ScalarPlusScalar}, \ + {"st1w_z_p_bz_d_64_scaled"_h, \ + &VISITORCLASS::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets}, \ + {"st1w_z_p_bz_d_64_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets}, \ + {"st1w_z_p_bz_d_x32_scaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets}, \ + {"st1w_z_p_bz_d_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets}, \ + {"st1w_z_p_bz_s_x32_scaled"_h, \ + &VISITORCLASS::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets}, \ + {"st1w_z_p_bz_s_x32_unscaled"_h, \ + &VISITORCLASS:: \ + VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets}, \ + {"st2b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st2d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st2h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st2w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st2w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st3w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st3w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"st4w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusImm}, \ + {"st4w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEStoreMultipleStructures_ScalarPlusScalar}, \ + {"stnt1b_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1b_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"stnt1d_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1d_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"stnt1h_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1h_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"stnt1w_z_p_bi_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusImm}, \ + {"stnt1w_z_p_br_contiguous"_h, \ + &VISITORCLASS::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar}, \ + {"str_p_bi"_h, &VISITORCLASS::VisitSVEStorePredicateRegister}, \ + {"str_z_bi"_h, &VISITORCLASS::VisitSVEStoreVectorRegister}, \ + {"subr_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \ + {"subr_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sub_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntAddSubtractVectors_Predicated}, \ + {"sub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"sub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"sunpkhi_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"sunpklo_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"sxtb_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"sxth_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"sxtw_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"tbl_z_zz_1"_h, &VISITORCLASS::VisitSVETableLookup}, \ + {"trn1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"trn1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"trn2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"trn2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"uabd_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"uaddv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"ucvtf_z_p_z_h2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_w2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_w2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_w2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_x2d"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_x2fp16"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"ucvtf_z_p_z_x2s"_h, &VISITORCLASS::VisitSVEIntConvertToFP}, \ + {"udf_only_perm_undef"_h, &VISITORCLASS::VisitReserved}, \ + {"udivr_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"udiv_z_p_zz"_h, &VISITORCLASS::VisitSVEIntDivideVectors_Predicated}, \ + {"udot_z_zzz"_h, &VISITORCLASS::VisitSVEIntMulAddUnpredicated}, \ + {"udot_z_zzzi_d"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"udot_z_zzzi_s"_h, &VISITORCLASS::VisitSVEMulIndex}, \ + {"umaxv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"umax_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"umax_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"uminv_r_p_z"_h, &VISITORCLASS::VisitSVEIntReduction}, \ + {"umin_z_p_zz"_h, \ + &VISITORCLASS::VisitSVEIntMinMaxDifference_Predicated}, \ + {"umin_z_zi"_h, &VISITORCLASS::VisitSVEIntMinMaxImm_Unpredicated}, \ + {"umulh_z_p_zz"_h, &VISITORCLASS::VisitSVEIntMulVectors_Predicated}, \ + {"uqadd_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"uqadd_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"uqdecb_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecd_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqdech_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdech_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdech_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqdecp_r_p_r_uw"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqdecp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqdecp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqdecw_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqdecw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqincb_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincb_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincd_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincd_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincd_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqinch_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqinch_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqinch_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqincp_r_p_r_uw"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqincp_r_p_r_x"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqincp_z_p_z"_h, &VISITORCLASS::VisitSVEIncDecByPredicateCount}, \ + {"uqincw_r_rs_uw"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincw_r_rs_x"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecRegisterByElementCount}, \ + {"uqincw_z_zs"_h, \ + &VISITORCLASS::VisitSVESaturatingIncDecVectorByElementCount}, \ + {"uqsub_z_zi"_h, &VISITORCLASS::VisitSVEIntAddSubtractImm_Unpredicated}, \ + {"uqsub_z_zz"_h, &VISITORCLASS::VisitSVEIntArithmeticUnpredicated}, \ + {"uunpkhi_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"uunpklo_z_z"_h, &VISITORCLASS::VisitSVEUnpackVectorElements}, \ + {"uxtb_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"uxth_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"uxtw_z_p_z"_h, &VISITORCLASS::VisitSVEIntUnaryArithmeticPredicated}, \ + {"uzp1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"uzp1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"uzp2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"uzp2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"whilele_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"whilelo_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"whilels_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"whilelt_p_p_rr"_h, \ + &VISITORCLASS::VisitSVEIntCompareScalarCountAndLimit}, \ + {"wrffr_f_p"_h, &VISITORCLASS::VisitSVEFFRWriteFromPredicate}, \ + {"zip1_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"zip1_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"zip2_p_pp"_h, &VISITORCLASS::VisitSVEPermutePredicateElements}, \ + {"zip2_z_zz"_h, &VISITORCLASS::VisitSVEPermuteVectorInterleaving}, \ + {"adds_32s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"adds_64s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"add_32_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"add_64_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"subs_32s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"subs_64s_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"sub_32_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"sub_64_addsub_ext"_h, &VISITORCLASS::VisitAddSubExtended}, \ + {"adds_32s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"adds_64s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"add_32_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"add_64_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"subs_32s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"subs_64s_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"sub_32_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"sub_64_addsub_imm"_h, &VISITORCLASS::VisitAddSubImmediate}, \ + {"adds_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"adds_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"add_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"add_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"subs_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"subs_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"sub_32_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"sub_64_addsub_shift"_h, &VISITORCLASS::VisitAddSubShifted}, \ + {"adcs_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"adcs_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"adc_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"adc_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbcs_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbcs_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbc_32_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"sbc_64_addsub_carry"_h, &VISITORCLASS::VisitAddSubWithCarry}, \ + {"ldaddab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadda_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadda_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaddl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadd_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldadd_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaprb_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldaprh_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldapr_32l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldapr_64l_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclralb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclralh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclral_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclral_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclra_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclra_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclrl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclr_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldclr_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoralb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoralh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoral_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeoral_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeora_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeora_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeorl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeor_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldeor_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldseta_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldseta_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldseth_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsetl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldset_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldset_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmaxl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmax_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmax_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmina_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmina_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsminl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmin_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldsmin_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumaxl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumax_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumax_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumina_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumina_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminlb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminlh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"lduminl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumin_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"ldumin_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpab_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpah_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpalb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpalh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpal_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpal_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpa_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpa_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swph_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swplb_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swplh_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpl_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swpl_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swp_32_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"swp_64_memop"_h, &VISITORCLASS::VisitAtomicMemory}, \ + {"bfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"bfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"sbfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"sbfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"ubfm_32m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"ubfm_64m_bitfield"_h, &VISITORCLASS::VisitBitfield}, \ + {"cbnz_32_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"cbnz_64_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"cbz_32_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"cbz_64_compbranch"_h, &VISITORCLASS::VisitCompareBranch}, \ + {"b_only_condbranch"_h, &VISITORCLASS::VisitConditionalBranch}, \ + {"ccmn_32_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmn_64_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmp_32_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmp_64_condcmp_imm"_h, \ + &VISITORCLASS::VisitConditionalCompareImmediate}, \ + {"ccmn_32_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"ccmn_64_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"ccmp_32_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"ccmp_64_condcmp_reg"_h, \ + &VISITORCLASS::VisitConditionalCompareRegister}, \ + {"csel_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csel_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinc_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinc_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinv_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csinv_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csneg_32_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"csneg_64_condsel"_h, &VISITORCLASS::VisitConditionalSelect}, \ + {"sha1h_ss_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \ + {"sha1su1_vv_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \ + {"sha256su0_vv_cryptosha2"_h, &VISITORCLASS::VisitCrypto2RegSHA}, \ + {"sha1c_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha1m_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha1p_qsv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha1su0_vvv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha256h2_qqv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha256h_qqv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"sha256su1_vvv_cryptosha3"_h, &VISITORCLASS::VisitCrypto3RegSHA}, \ + {"aesd_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"aese_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"aesimc_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"aesmc_b_cryptoaes"_h, &VISITORCLASS::VisitCryptoAES}, \ + {"autda_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autdb_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autdza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autdzb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autia_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autib_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autiza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"autizb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"cls_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"cls_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"clz_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"clz_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacda_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacdb_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacdza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacdzb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacia_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacib_64p_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"paciza_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"pacizb_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rbit_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rbit_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev16_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev16_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev32_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev_32_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"rev_64_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"xpacd_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"xpaci_64z_dp_1src"_h, &VISITORCLASS::VisitDataProcessing1Source}, \ + {"asrv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"asrv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32b_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32cb_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32ch_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32cw_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32cx_64c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32h_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32w_32c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"crc32x_64c_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lslv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lslv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lsrv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"lsrv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"pacga_64p_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"rorv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"rorv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"sdiv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"sdiv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"udiv_32_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"udiv_64_dp_2src"_h, &VISITORCLASS::VisitDataProcessing2Source}, \ + {"madd_32a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"madd_64a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"msub_32a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"msub_64a_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"smaddl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"smsubl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"smulh_64_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"umaddl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"umsubl_64wa_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"umulh_64_dp_3src"_h, &VISITORCLASS::VisitDataProcessing3Source}, \ + {"setf16_only_setf"_h, &VISITORCLASS::VisitEvaluateIntoFlags}, \ + {"setf8_only_setf"_h, &VISITORCLASS::VisitEvaluateIntoFlags}, \ + {"brk_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"dcps1_dc_exception"_h, &VISITORCLASS::VisitException}, \ + {"dcps2_dc_exception"_h, &VISITORCLASS::VisitException}, \ + {"dcps3_dc_exception"_h, &VISITORCLASS::VisitException}, \ + {"hlt_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"hvc_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"smc_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"svc_ex_exception"_h, &VISITORCLASS::VisitException}, \ + {"extr_32_extract"_h, &VISITORCLASS::VisitExtract}, \ + {"extr_64_extract"_h, &VISITORCLASS::VisitExtract}, \ + {"fcmpe_dz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_d_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_hz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_h_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_sz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmpe_s_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_dz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_d_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_hz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_h_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_sz_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fcmp_s_floatcmp"_h, &VISITORCLASS::VisitFPCompare}, \ + {"fccmpe_d_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmpe_h_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmpe_s_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmp_d_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmp_h_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fccmp_s_floatccmp"_h, &VISITORCLASS::VisitFPConditionalCompare}, \ + {"fcsel_d_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ + {"fcsel_h_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ + {"fcsel_s_floatsel"_h, &VISITORCLASS::VisitFPConditionalSelect}, \ + {"bfcvt_bs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fabs_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fabs_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fabs_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_dh_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_ds_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_hd_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_hs_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_sd_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fcvt_sh_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fmov_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fmov_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fmov_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fneg_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fneg_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fneg_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32x_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32x_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32z_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint32z_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64x_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64x_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64z_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frint64z_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinta_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinta_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinta_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinti_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinti_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frinti_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintm_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintm_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintm_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintn_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintn_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintn_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintp_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintp_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintp_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintx_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintx_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintx_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintz_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintz_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"frintz_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fsqrt_d_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fsqrt_h_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fsqrt_s_floatdp1"_h, &VISITORCLASS::VisitFPDataProcessing1Source}, \ + {"fadd_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fadd_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fadd_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fdiv_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fdiv_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fdiv_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmaxnm_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmaxnm_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmaxnm_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmax_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmax_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmax_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fminnm_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fminnm_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fminnm_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmin_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmin_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmin_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmul_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmul_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmul_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fnmul_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fnmul_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fnmul_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fsub_d_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fsub_h_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fsub_s_floatdp2"_h, &VISITORCLASS::VisitFPDataProcessing2Source}, \ + {"fmadd_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmadd_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmadd_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmsub_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmsub_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fmsub_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmadd_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmadd_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmadd_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmsub_d_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmsub_h_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fnmsub_s_floatdp3"_h, &VISITORCLASS::VisitFPDataProcessing3Source}, \ + {"fcvtzs_32d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_32h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_32s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_64d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_64h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzs_64s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_32d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_32h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_32s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_64d_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_64h_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fcvtzu_64s_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_d32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_d64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_h32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_h64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_s32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"scvtf_s64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_d32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_d64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_h32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_h64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_s32_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"ucvtf_s64_float2fix"_h, &VISITORCLASS::VisitFPFixedPointConvert}, \ + {"fmov_d_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \ + {"fmov_h_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \ + {"fmov_s_floatimm"_h, &VISITORCLASS::VisitFPImmediate}, \ + {"fcvtas_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtas_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtau_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtms_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtmu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtns_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtnu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtps_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtpu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzs_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fcvtzu_64s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fjcvtzs_32d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_32h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_32s_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_64d_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_64h_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_64vx_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"fmov_v64i_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_d32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"scvtf_s64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_d32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_d64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_h32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_h64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_s32_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ucvtf_s64_float2int"_h, &VISITORCLASS::VisitFPIntegerConvert}, \ + {"ldrsw_64_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_32_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_64_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_d_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_q_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"ldr_s_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"prfm_p_loadlit"_h, &VISITORCLASS::VisitLoadLiteral}, \ + {"casab_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casah_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casalb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casalh_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casal_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casal_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casa_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casa_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"cash_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caslb_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caslh_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casl_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casl_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspal_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspal_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspa_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspa_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspl_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"caspl_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casp_cp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"casp_cp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"cas_c32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"cas_c64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldarb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldarh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldar_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldar_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxp_lp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxp_lp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxrb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxrh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxr_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldaxr_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlarb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlarh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlar_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldlar_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxp_lp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxp_lp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxrb_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxrh_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxr_lr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldxr_lr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllrb_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllrh_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllr_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stllr_sl64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlrb_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlrh_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlr_sl32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlr_sl64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxp_sp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxp_sp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxrb_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxrh_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxr_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stlxr_sr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxp_sp32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxp_sp64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxrb_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxrh_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxr_sr32_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"stxr_sr64_ldstexcl"_h, &VISITORCLASS::VisitLoadStoreExclusive}, \ + {"ldraa_64w_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldraa_64_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldrab_64w_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldrab_64_ldst_pac"_h, &VISITORCLASS::VisitLoadStorePAC}, \ + {"ldnp_32_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_64_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_d_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_q_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldnp_s_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_32_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_64_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_d_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_q_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"stnp_s_ldstnapair_offs"_h, \ + &VISITORCLASS::VisitLoadStorePairNonTemporal}, \ + {"ldpsw_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_32_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_d_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_q_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldp_s_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_32_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_64_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_d_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_q_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"stp_s_ldstpair_off"_h, &VISITORCLASS::VisitLoadStorePairOffset}, \ + {"ldpsw_64_ldstpair_post"_h, \ + &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_32_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_64_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_d_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_q_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldp_s_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_32_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_64_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_d_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_q_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"stp_s_ldstpair_post"_h, &VISITORCLASS::VisitLoadStorePairPostIndex}, \ + {"ldpsw_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_32_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_d_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_q_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldp_s_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_32_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_64_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_d_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_q_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"stp_s_ldstpair_pre"_h, &VISITORCLASS::VisitLoadStorePairPreIndex}, \ + {"ldrb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsb_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsh_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrsw_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_b_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_d_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_h_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_q_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldr_s_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"strb_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"strh_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_32_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_64_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_b_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_d_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_h_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_q_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"str_s_ldst_immpost"_h, &VISITORCLASS::VisitLoadStorePostIndex}, \ + {"ldrb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsb_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsh_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldrsw_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_b_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_d_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_h_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_q_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldr_s_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"strb_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"strh_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_32_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_64_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_b_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_d_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_h_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_q_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"str_s_ldst_immpre"_h, &VISITORCLASS::VisitLoadStorePreIndex}, \ + {"ldapurb_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapurh_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursb_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursb_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursh_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursh_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapursw_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapur_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldapur_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlurb_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlurh_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlur_32_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"stlur_64_ldapstl_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreRCpcUnscaledOffset}, \ + {"ldrb_32bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrb_32b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_32bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_32b_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_64bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsb_64b_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsh_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldrsw_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_bl_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_d_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_h_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_q_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldr_s_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"prfm_p_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"strb_32bl_ldst_regoff"_h, \ + &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"strb_32b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"strh_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_32_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_64_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_bl_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_b_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_d_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_h_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_q_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"str_s_ldst_regoff"_h, &VISITORCLASS::VisitLoadStoreRegisterOffset}, \ + {"ldurb_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldurh_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursb_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursb_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursh_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursh_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldursw_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_b_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_d_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_h_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_q_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldur_s_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"prfum_p_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"sturb_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"sturh_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_32_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_64_ldst_unscaled"_h, \ + &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_b_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_d_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_h_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_q_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"stur_s_ldst_unscaled"_h, &VISITORCLASS::VisitLoadStoreUnscaledOffset}, \ + {"ldrb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsb_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsh_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldrsw_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_b_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_d_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_h_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_q_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ldr_s_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"prfm_p_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"strb_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"strh_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_32_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_64_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_b_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_d_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_h_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_q_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"str_s_ldst_pos"_h, &VISITORCLASS::VisitLoadStoreUnsignedOffset}, \ + {"ands_32s_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"ands_64s_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"and_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"and_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"eor_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"eor_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"orr_32_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"orr_64_log_imm"_h, &VISITORCLASS::VisitLogicalImmediate}, \ + {"ands_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"ands_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"and_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"and_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bics_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bics_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bic_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"bic_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eon_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eon_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eor_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"eor_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orn_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orn_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orr_32_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"orr_64_log_shift"_h, &VISITORCLASS::VisitLogicalShifted}, \ + {"movk_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movk_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movn_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movn_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movz_32_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"movz_64_movewide"_h, &VISITORCLASS::VisitMoveWideImmediate}, \ + {"fabs_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmeq_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmge_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmgt_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmle_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcmlt_asimdmiscfp16_fz"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtas_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtau_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtms_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtmu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtns_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtnu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtps_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtpu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtzs_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fcvtzu_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fneg_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frecpe_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frinta_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frinti_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintm_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintn_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintp_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintx_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frintz_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"frsqrte_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"fsqrt_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"scvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"ucvtf_asimdmiscfp16_r"_h, &VISITORCLASS::VisitNEON2RegMiscFP16}, \ + {"addhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"pmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"raddhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"rsubhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sabdl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"saddl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"saddw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"smlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"smlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"smull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sqdmlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sqdmlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"sqdmull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"ssubl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"ssubw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"subhn_asimddiff_n"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uabal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uabdl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uaddl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"uaddw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"umlal_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"umlsl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"umull_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"usubl_asimddiff_l"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"usubw_asimddiff_w"_h, &VISITORCLASS::VisitNEON3Different}, \ + {"addp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"add_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmeq_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmhi_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmhs_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"cmtst_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"facge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"facgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"faddp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcmeq_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcmge_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcmgt_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fdiv_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmaxnmp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmaxnm_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fminnmp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fminnm_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmla_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmls_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmulx_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"frecps_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"frsqrts_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqdmulh_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqrdmulh_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqrshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sqsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"srshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqrshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uqsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"urshl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"ushl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fcadd_asimdsame2_c"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"fcmla_asimdsame2_c"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"sdot_asimdsame2_d"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"sqrdmlah_asimdsame2_only"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"sqrdmlsh_asimdsame2_only"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"udot_asimdsame2_d"_h, &VISITORCLASS::VisitNEON3SameExtra}, \ + {"fabd_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"facge_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"facgt_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"faddp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fadd_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fcmeq_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fcmge_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fcmgt_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fdiv_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmaxnmp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmaxnm_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmaxp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmax_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fminnmp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fminnm_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fminp_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmin_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmla_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmls_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmulx_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fmul_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"frecps_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"frsqrts_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"fsub_asimdsamefp16_only"_h, &VISITORCLASS::VisitNEON3SameFP16}, \ + {"addv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"saddlv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"smaxv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"sminv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"uaddlv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"umaxv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"uminv_asimdall_only"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"mla_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"mls_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"mul_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqdmulh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqrdmlah_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqrdmlsh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"sqrdmulh_asimdelem_r"_h, &VISITORCLASS::VisitNEONByIndexedElement}, \ + {"dup_asimdins_dr_r"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"dup_asimdins_dv_v"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"ins_asimdins_ir_r"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"ins_asimdins_iv_v"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"smov_asimdins_w_w"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"smov_asimdins_x_x"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"umov_asimdins_w_w"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"umov_asimdins_x_x"_h, &VISITORCLASS::VisitNEONCopy}, \ + {"ext_asimdext_only"_h, &VISITORCLASS::VisitNEONExtract}, \ + {"ld1_asisdlse_r1_1v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlse_r2_2v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlse_r3_3v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlse_r4_4v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld2_asisdlse_r2"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld3_asisdlse_r3"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld4_asisdlse_r4"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r1_1v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r2_2v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r3_3v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st1_asisdlse_r4_4v"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st2_asisdlse_r2"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st3_asisdlse_r3"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"st4_asisdlse_r4"_h, &VISITORCLASS::VisitNEONLoadStoreMultiStruct}, \ + {"ld1_asisdlsep_i1_i1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_i2_i2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_i3_i3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_i4_i4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r1_r1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r2_r2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r3_r3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1_asisdlsep_r4_r4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld2_asisdlsep_i2_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld2_asisdlsep_r2_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld3_asisdlsep_i3_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld3_asisdlsep_r3_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld4_asisdlsep_i4_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld4_asisdlsep_r4_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i1_i1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i2_i2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i3_i3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_i4_i4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r1_r1"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r2_r2"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r3_r3"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st1_asisdlsep_r4_r4"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st2_asisdlsep_i2_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st2_asisdlsep_r2_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st3_asisdlsep_i3_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st3_asisdlsep_r3_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st4_asisdlsep_i4_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"st4_asisdlsep_r4_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreMultiStructPostIndex}, \ + {"ld1r_asisdlso_r1"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_b1_1b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_d1_1d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_h1_1h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1_asisdlso_s1_1s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2r_asisdlso_r2"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_b2_2b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_d2_2d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_h2_2h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld2_asisdlso_s2_2s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3r_asisdlso_r3"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_b3_3b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_d3_3d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_h3_3h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld3_asisdlso_s3_3s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4r_asisdlso_r4"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_b4_4b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_d4_4d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_h4_4h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld4_asisdlso_s4_4s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_b1_1b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_d1_1d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_h1_1h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st1_asisdlso_s1_1s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_b2_2b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_d2_2d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_h2_2h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st2_asisdlso_s2_2s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_b3_3b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_d3_3d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_h3_3h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st3_asisdlso_s3_3s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_b4_4b"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_d4_4d"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_h4_4h"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"st4_asisdlso_s4_4s"_h, &VISITORCLASS::VisitNEONLoadStoreSingleStruct}, \ + {"ld1r_asisdlsop_r1_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1r_asisdlsop_rx1_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_b1_i1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_bx1_r1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_d1_i1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_dx1_r1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_h1_i1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_hx1_r1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_s1_i1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld1_asisdlsop_sx1_r1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2r_asisdlsop_r2_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2r_asisdlsop_rx2_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_b2_i2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_bx2_r2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_d2_i2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_dx2_r2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_h2_i2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_hx2_r2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_s2_i2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld2_asisdlsop_sx2_r2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3r_asisdlsop_r3_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3r_asisdlsop_rx3_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_b3_i3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_bx3_r3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_d3_i3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_dx3_r3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_h3_i3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_hx3_r3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_s3_i3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld3_asisdlsop_sx3_r3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4r_asisdlsop_r4_i"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4r_asisdlsop_rx4_r"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_b4_i4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_bx4_r4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_d4_i4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_dx4_r4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_h4_i4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_hx4_r4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_s4_i4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"ld4_asisdlsop_sx4_r4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_b1_i1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_bx1_r1b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_d1_i1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_dx1_r1d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_h1_i1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_hx1_r1h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_s1_i1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st1_asisdlsop_sx1_r1s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_b2_i2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_bx2_r2b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_d2_i2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_dx2_r2d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_h2_i2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_hx2_r2h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_s2_i2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st2_asisdlsop_sx2_r2s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_b3_i3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_bx3_r3b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_d3_i3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_dx3_r3d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_h3_i3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_hx3_r3h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_s3_i3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st3_asisdlsop_sx3_r3s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_b4_i4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_bx4_r4b"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_d4_i4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_dx4_r4d"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_h4_i4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_hx4_r4h"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_s4_i4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"st4_asisdlsop_sx4_r4s"_h, \ + &VISITORCLASS::VisitNEONLoadStoreSingleStructPostIndex}, \ + {"bic_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"bic_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"fmov_asimdimm_d2_d"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"fmov_asimdimm_h_h"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"fmov_asimdimm_s_s"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_d2_d"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_d_ds"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_m_sm"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"movi_asimdimm_n_b"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"mvni_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"mvni_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"mvni_asimdimm_m_sm"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"orr_asimdimm_l_hl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"orr_asimdimm_l_sl"_h, &VISITORCLASS::VisitNEONModifiedImmediate}, \ + {"trn1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"trn2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"uzp1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"uzp2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"zip1_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"zip2_asimdperm_only"_h, &VISITORCLASS::VisitNEONPerm}, \ + {"sqabs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"sqneg_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"sqxtn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"sqxtun_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"suqadd_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"uqxtn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"usqadd_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmeq_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmge_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmgt_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmle_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcmlt_asisdmiscfp16_fz"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtas_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtau_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtms_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtmu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtns_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtnu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtps_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtpu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtzs_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"fcvtzu_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"frecpe_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"frecpx_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"frsqrte_asisdmiscfp16_r"_h, \ + &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"scvtf_asisdmiscfp16_r"_h, &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"ucvtf_asisdmiscfp16_r"_h, &VISITORCLASS::VisitNEONScalar2RegMiscFP16}, \ + {"sqdmlal_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \ + {"sqdmlsl_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \ + {"sqdmull_asisddiff_only"_h, &VISITORCLASS::VisitNEONScalar3Diff}, \ + {"sqadd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqdmulh_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqrdmulh_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqrshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqsub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"srshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqadd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqrshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"uqsub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"urshl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"ushl_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fabd_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"facge_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"facgt_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fcmeq_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fcmge_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fcmgt_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"fmulx_asisdsamefp16_only"_h, &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"frecps_asisdsamefp16_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"frsqrts_asisdsamefp16_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameFP16}, \ + {"sqdmulh_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqrdmlah_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqrdmlsh_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqrdmulh_asisdelem_r"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"dup_asisdone_only"_h, &VISITORCLASS::VisitNEONScalarCopy}, \ + {"addp_asisdpair_only"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"faddp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"faddp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxnmp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxnmp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fmaxp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminnmp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminnmp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminp_asisdpair_only_h"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fminp_asisdpair_only_sd"_h, &VISITORCLASS::VisitNEONScalarPairwise}, \ + {"fcvtzs_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"fcvtzu_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"scvtf_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshlu_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ucvtf_asisdshf_c"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"uqshl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshlu_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqshl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"uqshl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"shl_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sli_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"tbl_asimdtbl_l1_1"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbl_asimdtbl_l2_2"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbl_asimdtbl_l3_3"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbl_asimdtbl_l4_4"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l1_1"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l2_2"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l3_3"_h, &VISITORCLASS::VisitNEONTable}, \ + {"tbx_asimdtbl_l4_4"_h, &VISITORCLASS::VisitNEONTable}, \ + {"adrp_only_pcreladdr"_h, &VISITORCLASS::VisitPCRelAddressing}, \ + {"adr_only_pcreladdr"_h, &VISITORCLASS::VisitPCRelAddressing}, \ + {"rmif_only_rmif"_h, &VISITORCLASS::VisitRotateRightIntoFlags}, \ + {"bti_hb_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"clrex_bn_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"dmb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"dsb_bo_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"hint_hm_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"mrs_rs_systemmove"_h, &VISITORCLASS::VisitSystem}, \ + {"msr_sr_systemmove"_h, &VISITORCLASS::VisitSystem}, \ + {"psb_hc_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"sb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"sysl_rc_systeminstrs"_h, &VISITORCLASS::VisitSystem}, \ + {"sys_cr_systeminstrs"_h, &VISITORCLASS::VisitSystem}, \ + {"tcommit_only_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"tsb_hc_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"tbnz_only_testbranch"_h, &VISITORCLASS::VisitTestBranch}, \ + {"tbz_only_testbranch"_h, &VISITORCLASS::VisitTestBranch}, \ + {"bl_only_branch_imm"_h, &VISITORCLASS::VisitUnconditionalBranch}, \ + {"b_only_branch_imm"_h, &VISITORCLASS::VisitUnconditionalBranch}, \ + {"blraaz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blraa_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blrabz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blrab_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"blr_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"braaz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"braa_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"brabz_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"brab_64p_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"br_64_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"drps_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"eretaa_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"eretab_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"eret_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"retaa_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"retab_64e_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"ret_64r_branch_reg"_h, \ + &VISITORCLASS::VisitUnconditionalBranchToRegister}, \ + {"bcax_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfcvtn_asimdmisc_4s"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_asimdelem_e"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_asimdsame2_d"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlal_asimdelem_f"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlal_asimdsame2_f"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmmla_asimdsame2_e"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"dsb_bon_barriers"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"eor3_vvv16_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ld64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsb_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsh_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtrsw_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ldtr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"rax1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512h2_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512h_qqv_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512su0_vv2_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sha512su1_vvv2_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3partw1_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3partw2_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3ss1_vvv4_crypto4"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt1a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt1b_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt2a_vvv4_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm3tt2b_vvv_crypto3_imm2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm4ekey_vvv4_cryptosha512_3"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sm4e_vv4_cryptosha512_2"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st64b_64l_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st64bv_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"st64bv0_64_memop"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttrb_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttrh_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttr_32_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"sttr_64_ldst_unpriv"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"stzgm_64bulk_ldsttags"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"tcancel_ex_exception"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"tstart_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"ttest_br_systemresult"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"wfet_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"wfit_only_systeminstrswithreg"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"xar_vvv2_crypto3_imm6"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfcvt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfcvtnt_z_p_z_s2bf"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfdot_z_zzzi"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalb_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalb_z_zzzi"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalt_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmlalt_z_zzzi"_h, &VISITORCLASS::VisitUnimplemented}, \ + {"bfmmla_z_zzz"_h, &VISITORCLASS::VisitUnimplemented}, { \ + "unallocated"_h, &VISITORCLASS::VisitUnallocated \ + } + +#define SIM_AUD_VISITOR_MAP(VISITORCLASS) \ + {"autia1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autiasp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autiaz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autib1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autibsp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"autibz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"axflag_m_pstate"_h, &VISITORCLASS::VisitSystem}, \ + {"cfinv_m_pstate"_h, &VISITORCLASS::VisitSystem}, \ + {"csdb_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"dgh_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"esb_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"isb_bi_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"nop_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacia1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"paciasp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"paciaz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacib1716_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacibsp_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"pacibz_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"sev_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"sevl_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"ssbb_only_barriers"_h, &VISITORCLASS::VisitSystem}, \ + {"wfe_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"wfi_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"xaflag_m_pstate"_h, &VISITORCLASS::VisitSystem}, \ + {"xpaclri_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"yield_hi_hints"_h, &VISITORCLASS::VisitSystem}, \ + {"abs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cls_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"clz_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmeq_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmge_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmgt_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmle_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cmlt_asimdmisc_z"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"cnt_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fabs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmeq_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmge_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmgt_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmle_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcmlt_asimdmisc_fz"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtas_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtau_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtl_asimdmisc_l"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtms_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtmu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtns_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtnu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtps_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtpu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtxn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtzs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fcvtzu_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fneg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frecpe_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint32x_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint32z_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint64x_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frint64z_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frinta_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frinti_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintm_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintn_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintp_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintx_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frintz_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"frsqrte_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"fsqrt_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"neg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"not_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rbit_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rev16_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rev32_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"rev64_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sadalp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"saddlp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"scvtf_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"shll_asimdmisc_s"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqabs_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqneg_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqxtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"sqxtun_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"suqadd_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"uadalp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"uaddlp_asimdmisc_p"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"ucvtf_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"uqxtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"urecpe_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"ursqrte_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"usqadd_asimdmisc_r"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"xtn_asimdmisc_n"_h, &VISITORCLASS::VisitNEON2RegMisc}, \ + {"mla_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"mls_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"mul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"saba_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"shadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"shsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"smaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"smax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"sminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"smin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"srhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uaba_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uabd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uhsub_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"umaxp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"umax_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"uminp_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"umin_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"urhadd_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"and_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bic_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bif_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bit_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"bsl_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"eor_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"orr_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"orn_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"pmul_asimdsame_only"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlal2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlal_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlsl2_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"fmlsl_asimdsame_f"_h, &VISITORCLASS::VisitNEON3Same}, \ + {"ushll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sshll_asimdshf_l"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"shrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"rshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqrshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqshrun_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqrshrun_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"uqshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"uqrshrn_asimdshf_n"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sri_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"srshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"srsra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ssra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"urshr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ursra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ushr_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"usra_asimdshf_r"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"scvtf_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"ucvtf_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"fcvtzs_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"fcvtzu_asimdshf_c"_h, &VISITORCLASS::VisitNEONShiftImmediate}, \ + {"sqdmlal_asisdelem_l"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqdmlsl_asisdelem_l"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"sqdmull_asisdelem_l"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmla_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmla_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmls_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmls_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmulx_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmulx_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmul_asisdelem_rh_h"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fmul_asisdelem_r_sd"_h, \ + &VISITORCLASS::VisitNEONScalarByIndexedElement}, \ + {"fabd_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"facge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"facgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fcmeq_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fcmge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fcmgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"fmulx_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"frecps_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"frsqrts_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmeq_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmge_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmgt_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmhi_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmhs_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"cmtst_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"add_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sub_asisdsame_only"_h, &VISITORCLASS::VisitNEONScalar3Same}, \ + {"sqrdmlah_asisdsame2_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameExtra}, \ + {"sqrdmlsh_asisdsame2_only"_h, \ + &VISITORCLASS::VisitNEONScalar3SameExtra}, \ + {"fmaxnmv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fmaxv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminnmv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminv_asimdall_only_h"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fmaxnmv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminnmv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fmaxv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"fminv_asimdall_only_sd"_h, &VISITORCLASS::VisitNEONAcrossLanes}, \ + {"shl_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sli_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sri_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"srshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"srsra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ssra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"urshr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ursra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"ushr_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"usra_asisdshf_r"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqrshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqrshrun_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"sqshrun_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"uqrshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"uqshrn_asisdshf_n"_h, &VISITORCLASS::VisitNEONScalarShiftImmediate}, \ + {"cmeq_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmge_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmgt_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmle_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"cmlt_asisdmisc_z"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"abs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"neg_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmeq_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmge_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmgt_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmle_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcmlt_asisdmisc_fz"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtas_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtau_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtms_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtmu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtns_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtnu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtps_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtpu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtxn_asisdmisc_n"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtzs_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"fcvtzu_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"frecpe_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"frecpx_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"frsqrte_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, \ + {"scvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc}, { \ + "ucvtf_asisdmisc_r"_h, &VISITORCLASS::VisitNEONScalar2RegMisc \ + } diff --git a/3rdparty/vixl/include/vixl/aarch64/disasm-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/disasm-aarch64.h new file mode 100644 index 0000000000..cc941bb19e --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/disasm-aarch64.h @@ -0,0 +1,372 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_DISASM_AARCH64_H +#define VIXL_AARCH64_DISASM_AARCH64_H + +#include +#include +#include + +#include "../globals-vixl.h" +#include "../utils-vixl.h" + +#include "cpu-features-auditor-aarch64.h" +#include "decoder-aarch64.h" +#include "decoder-visitor-map-aarch64.h" +#include "instructions-aarch64.h" +#include "operands-aarch64.h" + +namespace vixl { +namespace aarch64 { + +class Disassembler : public DecoderVisitor { + public: + Disassembler(); + Disassembler(char* text_buffer, int buffer_size); + virtual ~Disassembler(); + char* GetOutput(); + + // Declare all Visitor functions. + virtual void Visit(Metadata* metadata, + const Instruction* instr) VIXL_OVERRIDE; + + protected: + virtual void ProcessOutput(const Instruction* instr); + + // Default output functions. The functions below implement a default way of + // printing elements in the disassembly. A sub-class can override these to + // customize the disassembly output. + + // Prints the name of a register. + // TODO: This currently doesn't allow renaming of V registers. + virtual void AppendRegisterNameToOutput(const Instruction* instr, + const CPURegister& reg); + + // Prints a PC-relative offset. This is used for example when disassembling + // branches to immediate offsets. + virtual void AppendPCRelativeOffsetToOutput(const Instruction* instr, + int64_t offset); + + // Prints an address, in the general case. It can be code or data. This is + // used for example to print the target address of an ADR instruction. + virtual void AppendCodeRelativeAddressToOutput(const Instruction* instr, + const void* addr); + + // Prints the address of some code. + // This is used for example to print the target address of a branch to an + // immediate offset. + // A sub-class can for example override this method to lookup the address and + // print an appropriate name. + virtual void AppendCodeRelativeCodeAddressToOutput(const Instruction* instr, + const void* addr); + + // Prints the address of some data. + // This is used for example to print the source address of a load literal + // instruction. + virtual void AppendCodeRelativeDataAddressToOutput(const Instruction* instr, + const void* addr); + + // Same as the above, but for addresses that are not relative to the code + // buffer. They are currently not used by VIXL. + virtual void AppendAddressToOutput(const Instruction* instr, + const void* addr); + virtual void AppendCodeAddressToOutput(const Instruction* instr, + const void* addr); + virtual void AppendDataAddressToOutput(const Instruction* instr, + const void* addr); + + public: + // Get/Set the offset that should be added to code addresses when printing + // code-relative addresses in the AppendCodeRelativeAddressToOutput() + // helpers. + // Below is an example of how a branch immediate instruction in memory at + // address 0xb010200 would disassemble with different offsets. + // Base address | Disassembly + // 0x0 | 0xb010200: b #+0xcc (addr 0xb0102cc) + // 0x10000 | 0xb000200: b #+0xcc (addr 0xb0002cc) + // 0xb010200 | 0x0: b #+0xcc (addr 0xcc) + void MapCodeAddress(int64_t base_address, const Instruction* instr_address); + int64_t CodeRelativeAddress(const void* instr); + + private: +#define DECLARE(A) virtual void Visit##A(const Instruction* instr); + VISITOR_LIST(DECLARE) +#undef DECLARE + + using FormToVisitorFnMap = std::unordered_map< + uint32_t, + std::function>; + static const FormToVisitorFnMap* GetFormToVisitorFnMap(); + + std::string mnemonic_; + uint32_t form_hash_; + + void SetMnemonicFromForm(const std::string& form) { + if (form != "unallocated") { + VIXL_ASSERT(form.find_first_of('_') != std::string::npos); + mnemonic_ = form.substr(0, form.find_first_of('_')); + } + } + + void Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction* instr); + void Disassemble_ZdB_ZnB_ZmB(const Instruction* instr); + void Disassemble_ZdD_PgM_ZnS(const Instruction* instr); + void Disassemble_ZdD_ZnD_ZmD(const Instruction* instr); + void Disassemble_ZdD_ZnD_ZmD_imm(const Instruction* instr); + void Disassemble_ZdD_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdH_PgM_ZnS(const Instruction* instr); + void Disassemble_ZdH_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdS_PgM_ZnD(const Instruction* instr); + void Disassemble_ZdS_PgM_ZnH(const Instruction* instr); + void Disassemble_ZdS_PgM_ZnS(const Instruction* instr); + void Disassemble_ZdS_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdS_ZnS_ZmS(const Instruction* instr); + void Disassemble_ZdS_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdT_PgM_ZnT(const Instruction* instr); + void Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction* instr); + void Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction* instr); + void Disassemble_ZdT_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdT_ZnT_ZmTb(const Instruction* instr); + void Disassemble_ZdT_ZnTb(const Instruction* instr); + void Disassemble_ZdT_ZnTb_ZmTb(const Instruction* instr); + void Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction* instr); + void Disassemble_ZdaD_ZnH_ZmH_imm_const(const Instruction* instr); + void Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr); + void Disassemble_ZdaS_ZnB_ZmB_imm_const(const Instruction* instr); + void Disassemble_ZdaS_ZnH_ZmH(const Instruction* instr); + void Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction* instr); + void Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction* instr); + void Disassemble_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr); + void Disassemble_ZdaT_PgM_ZnTb(const Instruction* instr); + void Disassemble_ZdaT_ZnT_ZmT(const Instruction* instr); + void Disassemble_ZdaT_ZnT_ZmT_const(const Instruction* instr); + void Disassemble_ZdaT_ZnT_const(const Instruction* instr); + void Disassemble_ZdaT_ZnTb_ZmTb(const Instruction* instr); + void Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction* instr); + void Disassemble_ZdnB_ZdnB(const Instruction* instr); + void Disassemble_ZdnB_ZdnB_ZmB(const Instruction* instr); + void Disassemble_ZdnS_ZdnS_ZmS(const Instruction* instr); + void Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr); + void Disassemble_ZdnT_PgM_ZdnT_const(const Instruction* instr); + void Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction* instr); + void Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction* instr); + void Disassemble_ZtD_Pg_ZnD_Xm(const Instruction* instr); + void Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction* instr); + void Disassemble_ZtS_Pg_ZnS_Xm(const Instruction* instr); + void Disassemble_ZdaS_ZnB_ZmB(const Instruction* instr); + void Disassemble_Vd4S_Vn16B_Vm16B(const Instruction* instr); + + void DisassembleCpy(const Instruction* instr); + void DisassembleSet(const Instruction* instr); + void DisassembleMinMaxImm(const Instruction* instr); + + void DisassembleSVEShiftLeftImm(const Instruction* instr); + void DisassembleSVEShiftRightImm(const Instruction* instr); + void DisassembleSVEAddSubCarry(const Instruction* instr); + void DisassembleSVEAddSubHigh(const Instruction* instr); + void DisassembleSVEComplexIntAddition(const Instruction* instr); + void DisassembleSVEBitwiseTernary(const Instruction* instr); + void DisassembleSVEFlogb(const Instruction* instr); + void DisassembleSVEFPPair(const Instruction* instr); + + void DisassembleNoArgs(const Instruction* instr); + + void DisassembleNEONMulByElementLong(const Instruction* instr); + void DisassembleNEONDotProdByElement(const Instruction* instr); + void DisassembleNEONFPMulByElement(const Instruction* instr); + void DisassembleNEONHalfFPMulByElement(const Instruction* instr); + void DisassembleNEONFPMulByElementLong(const Instruction* instr); + void DisassembleNEONComplexMulByElement(const Instruction* instr); + void DisassembleNEON2RegLogical(const Instruction* instr); + void DisassembleNEON2RegExtract(const Instruction* instr); + void DisassembleNEON2RegAddlp(const Instruction* instr); + void DisassembleNEON2RegCompare(const Instruction* instr); + void DisassembleNEON2RegFPCompare(const Instruction* instr); + void DisassembleNEON2RegFPConvert(const Instruction* instr); + void DisassembleNEON2RegFP(const Instruction* instr); + void DisassembleNEON3SameLogical(const Instruction* instr); + void DisassembleNEON3SameFHM(const Instruction* instr); + void DisassembleNEON3SameNoD(const Instruction* instr); + void DisassembleNEONShiftLeftLongImm(const Instruction* instr); + void DisassembleNEONShiftRightImm(const Instruction* instr); + void DisassembleNEONShiftRightNarrowImm(const Instruction* instr); + void DisassembleNEONScalarSatMulLongIndex(const Instruction* instr); + void DisassembleNEONFPScalarMulIndex(const Instruction* instr); + void DisassembleNEONFPScalar3Same(const Instruction* instr); + void DisassembleNEONScalar3SameOnlyD(const Instruction* instr); + void DisassembleNEONFPAcrossLanes(const Instruction* instr); + void DisassembleNEONFP16AcrossLanes(const Instruction* instr); + void DisassembleNEONScalarShiftImmOnlyD(const Instruction* instr); + void DisassembleNEONScalarShiftRightNarrowImm(const Instruction* instr); + void DisassembleNEONScalar2RegMiscOnlyD(const Instruction* instr); + void DisassembleNEONFPScalar2RegMisc(const Instruction* instr); + + void DisassembleMTELoadTag(const Instruction* instr); + void DisassembleMTEStoreTag(const Instruction* instr); + void DisassembleMTEStoreTagPair(const Instruction* instr); + + void Disassemble_XdSP_XnSP_Xm(const Instruction* instr); + void Disassemble_XdSP_XnSP_uimm6_uimm4(const Instruction* instr); + void Disassemble_Xd_XnSP_Xm(const Instruction* instr); + void Disassemble_Xd_XnSP_XmSP(const Instruction* instr); + + void Format(const Instruction* instr, + const char* mnemonic, + const char* format0, + const char* format1 = NULL); + void FormatWithDecodedMnemonic(const Instruction* instr, + const char* format0, + const char* format1 = NULL); + + void Substitute(const Instruction* instr, const char* string); + int SubstituteField(const Instruction* instr, const char* format); + int SubstituteRegisterField(const Instruction* instr, const char* format); + int SubstitutePredicateRegisterField(const Instruction* instr, + const char* format); + int SubstituteImmediateField(const Instruction* instr, const char* format); + int SubstituteLiteralField(const Instruction* instr, const char* format); + int SubstituteBitfieldImmediateField(const Instruction* instr, + const char* format); + int SubstituteShiftField(const Instruction* instr, const char* format); + int SubstituteExtendField(const Instruction* instr, const char* format); + int SubstituteConditionField(const Instruction* instr, const char* format); + int SubstitutePCRelAddressField(const Instruction* instr, const char* format); + int SubstituteBranchTargetField(const Instruction* instr, const char* format); + int SubstituteLSRegOffsetField(const Instruction* instr, const char* format); + int SubstitutePrefetchField(const Instruction* instr, const char* format); + int SubstituteBarrierField(const Instruction* instr, const char* format); + int SubstituteSysOpField(const Instruction* instr, const char* format); + int SubstituteCrField(const Instruction* instr, const char* format); + int SubstituteIntField(const Instruction* instr, const char* format); + int SubstituteSVESize(const Instruction* instr, const char* format); + int SubstituteTernary(const Instruction* instr, const char* format); + + std::pair GetRegNumForField(const Instruction* instr, + char reg_prefix, + const char* field); + + bool RdIsZROrSP(const Instruction* instr) const { + return (instr->GetRd() == kZeroRegCode); + } + + bool RnIsZROrSP(const Instruction* instr) const { + return (instr->GetRn() == kZeroRegCode); + } + + bool RmIsZROrSP(const Instruction* instr) const { + return (instr->GetRm() == kZeroRegCode); + } + + bool RaIsZROrSP(const Instruction* instr) const { + return (instr->GetRa() == kZeroRegCode); + } + + bool IsMovzMovnImm(unsigned reg_size, uint64_t value); + + int64_t code_address_offset() const { return code_address_offset_; } + + protected: + void ResetOutput(); + void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3); + + void set_code_address_offset(int64_t code_address_offset) { + code_address_offset_ = code_address_offset; + } + + char* buffer_; + uint32_t buffer_pos_; + uint32_t buffer_size_; + bool own_buffer_; + + int64_t code_address_offset_; +}; + + +class PrintDisassembler : public Disassembler { + public: + explicit PrintDisassembler(FILE* stream) + : cpu_features_auditor_(NULL), + cpu_features_prefix_("// Needs: "), + cpu_features_suffix_(""), + signed_addresses_(false), + stream_(stream) {} + + // Convenience helpers for quick disassembly, without having to manually + // create a decoder. + void DisassembleBuffer(const Instruction* start, uint64_t size); + void DisassembleBuffer(const Instruction* start, const Instruction* end); + void Disassemble(const Instruction* instr); + + // If a CPUFeaturesAuditor is specified, it will be used to annotate + // disassembly. The CPUFeaturesAuditor is expected to visit the instructions + // _before_ the disassembler, such that the CPUFeatures information is + // available when the disassembler is called. + void RegisterCPUFeaturesAuditor(CPUFeaturesAuditor* auditor) { + cpu_features_auditor_ = auditor; + } + + // Set the prefix to appear before the CPU features annotations. + void SetCPUFeaturesPrefix(const char* prefix) { + VIXL_ASSERT(prefix != NULL); + cpu_features_prefix_ = prefix; + } + + // Set the suffix to appear after the CPU features annotations. + void SetCPUFeaturesSuffix(const char* suffix) { + VIXL_ASSERT(suffix != NULL); + cpu_features_suffix_ = suffix; + } + + // By default, addresses are printed as simple, unsigned 64-bit hex values. + // + // With `PrintSignedAddresses(true)`: + // - negative addresses are printed as "-0x1234...", + // - positive addresses have a leading space, like " 0x1234...", to maintain + // alignment. + // + // This is most useful in combination with Disassembler::MapCodeAddress(...). + void PrintSignedAddresses(bool s) { signed_addresses_ = s; } + + protected: + virtual void ProcessOutput(const Instruction* instr) VIXL_OVERRIDE; + + CPUFeaturesAuditor* cpu_features_auditor_; + const char* cpu_features_prefix_; + const char* cpu_features_suffix_; + bool signed_addresses_; + + private: + FILE* stream_; +}; +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_AARCH64_DISASM_AARCH64_H diff --git a/3rdparty/vixl/include/vixl/aarch64/instructions-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/instructions-aarch64.h new file mode 100644 index 0000000000..0834a039b5 --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/instructions-aarch64.h @@ -0,0 +1,1148 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_INSTRUCTIONS_AARCH64_H_ +#define VIXL_AARCH64_INSTRUCTIONS_AARCH64_H_ + +#include "../globals-vixl.h" +#include "../utils-vixl.h" + +#include "constants-aarch64.h" + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-enum-enum-conversion" +#endif + +namespace vixl { +namespace aarch64 { +// ISA constants. -------------------------------------------------------------- + +typedef uint32_t Instr; +const unsigned kInstructionSize = 4; +const unsigned kInstructionSizeLog2 = 2; +const unsigned kLiteralEntrySize = 4; +const unsigned kLiteralEntrySizeLog2 = 2; +const unsigned kMaxLoadLiteralRange = 1 * MBytes; + +// This is the nominal page size (as used by the adrp instruction); the actual +// size of the memory pages allocated by the kernel is likely to differ. +const unsigned kPageSize = 4 * KBytes; +const unsigned kPageSizeLog2 = 12; + +const unsigned kBRegSize = 8; +const unsigned kBRegSizeLog2 = 3; +const unsigned kBRegSizeInBytes = kBRegSize / 8; +const unsigned kBRegSizeInBytesLog2 = kBRegSizeLog2 - 3; +const unsigned kHRegSize = 16; +const unsigned kHRegSizeLog2 = 4; +const unsigned kHRegSizeInBytes = kHRegSize / 8; +const unsigned kHRegSizeInBytesLog2 = kHRegSizeLog2 - 3; +const unsigned kWRegSize = 32; +const unsigned kWRegSizeLog2 = 5; +const unsigned kWRegSizeInBytes = kWRegSize / 8; +const unsigned kWRegSizeInBytesLog2 = kWRegSizeLog2 - 3; +const unsigned kXRegSize = 64; +const unsigned kXRegSizeLog2 = 6; +const unsigned kXRegSizeInBytes = kXRegSize / 8; +const unsigned kXRegSizeInBytesLog2 = kXRegSizeLog2 - 3; +const unsigned kSRegSize = 32; +const unsigned kSRegSizeLog2 = 5; +const unsigned kSRegSizeInBytes = kSRegSize / 8; +const unsigned kSRegSizeInBytesLog2 = kSRegSizeLog2 - 3; +const unsigned kDRegSize = 64; +const unsigned kDRegSizeLog2 = 6; +const unsigned kDRegSizeInBytes = kDRegSize / 8; +const unsigned kDRegSizeInBytesLog2 = kDRegSizeLog2 - 3; +const unsigned kQRegSize = 128; +const unsigned kQRegSizeLog2 = 7; +const unsigned kQRegSizeInBytes = kQRegSize / 8; +const unsigned kQRegSizeInBytesLog2 = kQRegSizeLog2 - 3; +const uint64_t kWRegMask = UINT64_C(0xffffffff); +const uint64_t kXRegMask = UINT64_C(0xffffffffffffffff); +const uint64_t kHRegMask = UINT64_C(0xffff); +const uint64_t kSRegMask = UINT64_C(0xffffffff); +const uint64_t kDRegMask = UINT64_C(0xffffffffffffffff); +const uint64_t kHSignMask = UINT64_C(0x8000); +const uint64_t kSSignMask = UINT64_C(0x80000000); +const uint64_t kDSignMask = UINT64_C(0x8000000000000000); +const uint64_t kWSignMask = UINT64_C(0x80000000); +const uint64_t kXSignMask = UINT64_C(0x8000000000000000); +const uint64_t kByteMask = UINT64_C(0xff); +const uint64_t kHalfWordMask = UINT64_C(0xffff); +const uint64_t kWordMask = UINT64_C(0xffffffff); +const uint64_t kXMaxUInt = UINT64_C(0xffffffffffffffff); +const uint64_t kWMaxUInt = UINT64_C(0xffffffff); +const uint64_t kHMaxUInt = UINT64_C(0xffff); +// Define k*MinInt with "-k*MaxInt - 1", because the hexadecimal representation +// (e.g. "INT32_C(0x80000000)") has implementation-defined behaviour. +const int64_t kXMaxInt = INT64_C(0x7fffffffffffffff); +const int64_t kXMinInt = -kXMaxInt - 1; +const int32_t kWMaxInt = INT32_C(0x7fffffff); +const int32_t kWMinInt = -kWMaxInt - 1; +const int16_t kHMaxInt = INT16_C(0x7fff); +const int16_t kHMinInt = -kHMaxInt - 1; +const unsigned kFpRegCode = 29; +const unsigned kLinkRegCode = 30; +const unsigned kSpRegCode = 31; +const unsigned kZeroRegCode = 31; +const unsigned kSPRegInternalCode = 63; +const unsigned kRegCodeMask = 0x1f; + +const unsigned kAtomicAccessGranule = 16; + +const unsigned kAddressTagOffset = 56; +const unsigned kAddressTagWidth = 8; +const uint64_t kAddressTagMask = ((UINT64_C(1) << kAddressTagWidth) - 1) + << kAddressTagOffset; +VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000)); + +const uint64_t kTTBRMask = UINT64_C(1) << 55; + +// We can't define a static kZRegSize because the size depends on the +// implementation. However, it is sometimes useful to know the minimum and +// maximum possible sizes. +const unsigned kZRegMinSize = 128; +const unsigned kZRegMinSizeLog2 = 7; +const unsigned kZRegMinSizeInBytes = kZRegMinSize / 8; +const unsigned kZRegMinSizeInBytesLog2 = kZRegMinSizeLog2 - 3; +const unsigned kZRegMaxSize = 2048; +const unsigned kZRegMaxSizeLog2 = 11; +const unsigned kZRegMaxSizeInBytes = kZRegMaxSize / 8; +const unsigned kZRegMaxSizeInBytesLog2 = kZRegMaxSizeLog2 - 3; + +// The P register size depends on the Z register size. +const unsigned kZRegBitsPerPRegBit = kBitsPerByte; +const unsigned kZRegBitsPerPRegBitLog2 = 3; +const unsigned kPRegMinSize = kZRegMinSize / kZRegBitsPerPRegBit; +const unsigned kPRegMinSizeLog2 = kZRegMinSizeLog2 - 3; +const unsigned kPRegMinSizeInBytes = kPRegMinSize / 8; +const unsigned kPRegMinSizeInBytesLog2 = kPRegMinSizeLog2 - 3; +const unsigned kPRegMaxSize = kZRegMaxSize / kZRegBitsPerPRegBit; +const unsigned kPRegMaxSizeLog2 = kZRegMaxSizeLog2 - 3; +const unsigned kPRegMaxSizeInBytes = kPRegMaxSize / 8; +const unsigned kPRegMaxSizeInBytesLog2 = kPRegMaxSizeLog2 - 3; + +const unsigned kMTETagGranuleInBytes = 16; +const unsigned kMTETagGranuleInBytesLog2 = 4; +const unsigned kMTETagWidth = 4; + +// Make these moved float constants backwards compatible +// with explicit vixl::aarch64:: namespace references. +using vixl::kDoubleMantissaBits; +using vixl::kDoubleExponentBits; +using vixl::kFloatMantissaBits; +using vixl::kFloatExponentBits; +using vixl::kFloat16MantissaBits; +using vixl::kFloat16ExponentBits; + +using vixl::kFP16PositiveInfinity; +using vixl::kFP16NegativeInfinity; +using vixl::kFP32PositiveInfinity; +using vixl::kFP32NegativeInfinity; +using vixl::kFP64PositiveInfinity; +using vixl::kFP64NegativeInfinity; + +using vixl::kFP16DefaultNaN; +using vixl::kFP32DefaultNaN; +using vixl::kFP64DefaultNaN; + +unsigned CalcLSDataSize(LoadStoreOp op); +unsigned CalcLSPairDataSize(LoadStorePairOp op); + +enum ImmBranchType { + UnknownBranchType = 0, + CondBranchType = 1, + UncondBranchType = 2, + CompareBranchType = 3, + TestBranchType = 4 +}; + +enum AddrMode { Offset, PreIndex, PostIndex }; + +enum Reg31Mode { Reg31IsStackPointer, Reg31IsZeroRegister }; + +enum VectorFormat { + kFormatUndefined = 0xffffffff, + kFormat8B = NEON_8B, + kFormat16B = NEON_16B, + kFormat4H = NEON_4H, + kFormat8H = NEON_8H, + kFormat2S = NEON_2S, + kFormat4S = NEON_4S, + kFormat1D = NEON_1D, + kFormat2D = NEON_2D, + + // Scalar formats. We add the scalar bit to distinguish between scalar and + // vector enumerations; the bit is always set in the encoding of scalar ops + // and always clear for vector ops. Although kFormatD and kFormat1D appear + // to be the same, their meaning is subtly different. The first is a scalar + // operation, the second a vector operation that only affects one lane. + kFormatB = NEON_B | NEONScalar, + kFormatH = NEON_H | NEONScalar, + kFormatS = NEON_S | NEONScalar, + kFormatD = NEON_D | NEONScalar, + + // An artificial value, used to distinguish from NEON format category. + kFormatSVE = 0x0000fffd, + // Artificial values. Q and O lane sizes aren't encoded in the usual size + // field. + kFormatSVEQ = 0x00080000, + kFormatSVEO = 0x00040000, + + // Vector element width of SVE register with the unknown lane count since + // the vector length is implementation dependent. + kFormatVnB = SVE_B | kFormatSVE, + kFormatVnH = SVE_H | kFormatSVE, + kFormatVnS = SVE_S | kFormatSVE, + kFormatVnD = SVE_D | kFormatSVE, + kFormatVnQ = kFormatSVEQ | kFormatSVE, + kFormatVnO = kFormatSVEO | kFormatSVE, + + // An artificial value, used by simulator trace tests and a few oddball + // instructions (such as FMLAL). + kFormat2H = 0xfffffffe +}; + +// Instructions. --------------------------------------------------------------- + +class Instruction { + public: + Instr GetInstructionBits() const { + return *(reinterpret_cast(this)); + } + VIXL_DEPRECATED("GetInstructionBits", Instr InstructionBits() const) { + return GetInstructionBits(); + } + + void SetInstructionBits(Instr new_instr) { + *(reinterpret_cast(this)) = new_instr; + } + + int ExtractBit(int pos) const { return (GetInstructionBits() >> pos) & 1; } + VIXL_DEPRECATED("ExtractBit", int Bit(int pos) const) { + return ExtractBit(pos); + } + + uint32_t ExtractBits(int msb, int lsb) const { + return ExtractUnsignedBitfield32(msb, lsb, GetInstructionBits()); + } + VIXL_DEPRECATED("ExtractBits", uint32_t Bits(int msb, int lsb) const) { + return ExtractBits(msb, lsb); + } + + // Compress bit extraction operation from Hacker's Delight. + // https://github.com/hcs0/Hackers-Delight/blob/master/compress.c.txt + uint32_t Compress(uint32_t mask) const { + uint32_t mk, mp, mv, t; + uint32_t x = GetInstructionBits() & mask; // Clear irrelevant bits. + mk = ~mask << 1; // We will count 0's to right. + for (int i = 0; i < 5; i++) { + mp = mk ^ (mk << 1); // Parallel suffix. + mp = mp ^ (mp << 2); + mp = mp ^ (mp << 4); + mp = mp ^ (mp << 8); + mp = mp ^ (mp << 16); + mv = mp & mask; // Bits to move. + mask = (mask ^ mv) | (mv >> (1 << i)); // Compress mask. + t = x & mv; + x = (x ^ t) | (t >> (1 << i)); // Compress x. + mk = mk & ~mp; + } + return x; + } + + template + uint32_t ExtractBits() const { + return Compress(M); + } + + uint32_t ExtractBitsAbsent() const { + VIXL_UNREACHABLE(); + return 0; + } + + template + uint32_t IsMaskedValue() const { + return (Mask(M) == V) ? 1 : 0; + } + + uint32_t IsMaskedValueAbsent() const { + VIXL_UNREACHABLE(); + return 0; + } + + int32_t ExtractSignedBits(int msb, int lsb) const { + int32_t bits = *(reinterpret_cast(this)); + return ExtractSignedBitfield32(msb, lsb, bits); + } + VIXL_DEPRECATED("ExtractSignedBits", + int32_t SignedBits(int msb, int lsb) const) { + return ExtractSignedBits(msb, lsb); + } + + Instr Mask(uint32_t mask) const { + VIXL_ASSERT(mask != 0); + return GetInstructionBits() & mask; + } + +#define DEFINE_GETTER(Name, HighBit, LowBit, Func) \ + int32_t Get##Name() const { return this->Func(HighBit, LowBit); } \ + VIXL_DEPRECATED("Get" #Name, int32_t Name() const) { return Get##Name(); } + INSTRUCTION_FIELDS_LIST(DEFINE_GETTER) +#undef DEFINE_GETTER + + template + int32_t GetRx() const { + // We don't have any register fields wider than five bits, so the result + // will always fit into an int32_t. + VIXL_ASSERT((msb - lsb + 1) <= 5); + return this->ExtractBits(msb, lsb); + } + + VectorFormat GetSVEVectorFormat(int field_lsb = 22) const { + VIXL_ASSERT((field_lsb >= 0) && (field_lsb <= 30)); + uint32_t instr = ExtractUnsignedBitfield32(field_lsb + 1, + field_lsb, + GetInstructionBits()) + << 22; + switch (instr & SVESizeFieldMask) { + case SVE_B: + return kFormatVnB; + case SVE_H: + return kFormatVnH; + case SVE_S: + return kFormatVnS; + case SVE_D: + return kFormatVnD; + } + VIXL_UNREACHABLE(); + return kFormatUndefined; + } + + // ImmPCRel is a compound field (not present in INSTRUCTION_FIELDS_LIST), + // formed from ImmPCRelLo and ImmPCRelHi. + int GetImmPCRel() const { + uint32_t hi = static_cast(GetImmPCRelHi()); + uint32_t lo = GetImmPCRelLo(); + uint32_t offset = (hi << ImmPCRelLo_width) | lo; + int width = ImmPCRelLo_width + ImmPCRelHi_width; + return ExtractSignedBitfield32(width - 1, 0, offset); + } + VIXL_DEPRECATED("GetImmPCRel", int ImmPCRel() const) { return GetImmPCRel(); } + + // ImmLSPAC is a compound field (not present in INSTRUCTION_FIELDS_LIST), + // formed from ImmLSPACLo and ImmLSPACHi. + int GetImmLSPAC() const { + uint32_t hi = static_cast(GetImmLSPACHi()); + uint32_t lo = GetImmLSPACLo(); + uint32_t offset = (hi << ImmLSPACLo_width) | lo; + int width = ImmLSPACLo_width + ImmLSPACHi_width; + return ExtractSignedBitfield32(width - 1, 0, offset) << 3; + } + + uint64_t GetImmLogical() const; + VIXL_DEPRECATED("GetImmLogical", uint64_t ImmLogical() const) { + return GetImmLogical(); + } + uint64_t GetSVEImmLogical() const; + int GetSVEBitwiseImmLaneSizeInBytesLog2() const; + uint64_t DecodeImmBitMask(int32_t n, + int32_t imm_s, + int32_t imm_r, + int32_t size) const; + + std::pair GetSVEPermuteIndexAndLaneSizeLog2() const; + + std::pair GetSVEMulZmAndIndex() const; + std::pair GetSVEMulLongZmAndIndex() const; + + std::pair GetSVEImmShiftAndLaneSizeLog2(bool is_predicated) const; + + int GetSVEExtractImmediate() const; + + int GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb = 23) const; + + int GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb = 21) const; + + + unsigned GetImmNEONabcdefgh() const; + VIXL_DEPRECATED("GetImmNEONabcdefgh", unsigned ImmNEONabcdefgh() const) { + return GetImmNEONabcdefgh(); + } + + Float16 GetImmFP16() const; + + float GetImmFP32() const; + VIXL_DEPRECATED("GetImmFP32", float ImmFP32() const) { return GetImmFP32(); } + + double GetImmFP64() const; + VIXL_DEPRECATED("GetImmFP64", double ImmFP64() const) { return GetImmFP64(); } + + Float16 GetImmNEONFP16() const; + + float GetImmNEONFP32() const; + VIXL_DEPRECATED("GetImmNEONFP32", float ImmNEONFP32() const) { + return GetImmNEONFP32(); + } + + double GetImmNEONFP64() const; + VIXL_DEPRECATED("GetImmNEONFP64", double ImmNEONFP64() const) { + return GetImmNEONFP64(); + } + + Float16 GetSVEImmFP16() const { return Imm8ToFloat16(ExtractBits(12, 5)); } + + float GetSVEImmFP32() const { return Imm8ToFP32(ExtractBits(12, 5)); } + + double GetSVEImmFP64() const { return Imm8ToFP64(ExtractBits(12, 5)); } + + static Float16 Imm8ToFloat16(uint32_t imm8); + static float Imm8ToFP32(uint32_t imm8); + static double Imm8ToFP64(uint32_t imm8); + + unsigned GetSizeLS() const { + return CalcLSDataSize(static_cast(Mask(LoadStoreMask))); + } + VIXL_DEPRECATED("GetSizeLS", unsigned SizeLS() const) { return GetSizeLS(); } + + unsigned GetSizeLSPair() const { + return CalcLSPairDataSize( + static_cast(Mask(LoadStorePairMask))); + } + VIXL_DEPRECATED("GetSizeLSPair", unsigned SizeLSPair() const) { + return GetSizeLSPair(); + } + + int GetNEONLSIndex(int access_size_shift) const { + int64_t q = GetNEONQ(); + int64_t s = GetNEONS(); + int64_t size = GetNEONLSSize(); + int64_t index = (q << 3) | (s << 2) | size; + return static_cast(index >> access_size_shift); + } + VIXL_DEPRECATED("GetNEONLSIndex", + int NEONLSIndex(int access_size_shift) const) { + return GetNEONLSIndex(access_size_shift); + } + + // Helpers. + bool IsCondBranchImm() const { + return Mask(ConditionalBranchFMask) == ConditionalBranchFixed; + } + + bool IsUncondBranchImm() const { + return Mask(UnconditionalBranchFMask) == UnconditionalBranchFixed; + } + + bool IsCompareBranch() const { + return Mask(CompareBranchFMask) == CompareBranchFixed; + } + + bool IsTestBranch() const { return Mask(TestBranchFMask) == TestBranchFixed; } + + bool IsImmBranch() const { return GetBranchType() != UnknownBranchType; } + + bool IsPCRelAddressing() const { + return Mask(PCRelAddressingFMask) == PCRelAddressingFixed; + } + + bool IsLogicalImmediate() const { + return Mask(LogicalImmediateFMask) == LogicalImmediateFixed; + } + + bool IsAddSubImmediate() const { + return Mask(AddSubImmediateFMask) == AddSubImmediateFixed; + } + + bool IsAddSubExtended() const { + return Mask(AddSubExtendedFMask) == AddSubExtendedFixed; + } + + bool IsLoadOrStore() const { + return Mask(LoadStoreAnyFMask) == LoadStoreAnyFixed; + } + + // True if `this` is valid immediately after the provided movprfx instruction. + bool CanTakeSVEMovprfx(uint32_t form_hash, Instruction const* movprfx) const; + bool CanTakeSVEMovprfx(const char* form, Instruction const* movprfx) const; + + bool IsLoad() const; + bool IsStore() const; + + bool IsLoadLiteral() const { + // This includes PRFM_lit. + return Mask(LoadLiteralFMask) == LoadLiteralFixed; + } + + bool IsMovn() const { + return (Mask(MoveWideImmediateMask) == MOVN_x) || + (Mask(MoveWideImmediateMask) == MOVN_w); + } + + bool IsException() const { return Mask(ExceptionFMask) == ExceptionFixed; } + + bool IsPAuth() const { return Mask(SystemPAuthFMask) == SystemPAuthFixed; } + + bool IsBti() const { + if (Mask(SystemHintFMask) == SystemHintFixed) { + int imm_hint = GetImmHint(); + switch (imm_hint) { + case BTI: + case BTI_c: + case BTI_j: + case BTI_jc: + return true; + } + } + return false; + } + + bool IsMOPSPrologueOf(const Instruction* instr, uint32_t mops_type) const { + VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) || + (mops_type == "cpy"_h)); + const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14; + return GetInstructionBits() == instr->Mask(~(0x3U << op_lsb)); + } + + bool IsMOPSMainOf(const Instruction* instr, uint32_t mops_type) const { + VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) || + (mops_type == "cpy"_h)); + const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14; + return GetInstructionBits() == + (instr->Mask(~(0x3U << op_lsb)) | (0x1 << op_lsb)); + } + + bool IsMOPSEpilogueOf(const Instruction* instr, uint32_t mops_type) const { + VIXL_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) || + (mops_type == "cpy"_h)); + const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14; + return GetInstructionBits() == + (instr->Mask(~(0x3U << op_lsb)) | (0x2 << op_lsb)); + } + + template + bool IsConsistentMOPSTriplet() const { + VIXL_STATIC_ASSERT((mops_type == "set"_h) || (mops_type == "setg"_h) || + (mops_type == "cpy"_h)); + + int64_t isize = static_cast(kInstructionSize); + const Instruction* prev2 = GetInstructionAtOffset(-2 * isize); + const Instruction* prev1 = GetInstructionAtOffset(-1 * isize); + const Instruction* next1 = GetInstructionAtOffset(1 * isize); + const Instruction* next2 = GetInstructionAtOffset(2 * isize); + + // Use the encoding of the current instruction to determine the expected + // adjacent instructions. NB. this doesn't check if the nearby instructions + // are MOPS-type, but checks that they form a consistent triplet if they + // are. For example, 'mov x0, #0; mov x0, #512; mov x0, #1024' is a + // consistent triplet, but they are not MOPS instructions. + const int op_lsb = (mops_type == "cpy"_h) ? 22 : 14; + const uint32_t kMOPSOpfield = 0x3 << op_lsb; + const uint32_t kMOPSPrologue = 0; + const uint32_t kMOPSMain = 0x1 << op_lsb; + const uint32_t kMOPSEpilogue = 0x2 << op_lsb; + switch (Mask(kMOPSOpfield)) { + case kMOPSPrologue: + return next1->IsMOPSMainOf(this, mops_type) && + next2->IsMOPSEpilogueOf(this, mops_type); + case kMOPSMain: + return prev1->IsMOPSPrologueOf(this, mops_type) && + next1->IsMOPSEpilogueOf(this, mops_type); + case kMOPSEpilogue: + return prev2->IsMOPSPrologueOf(this, mops_type) && + prev1->IsMOPSMainOf(this, mops_type); + default: + VIXL_ABORT_WITH_MSG("Undefined MOPS operation\n"); + } + } + + static int GetImmBranchRangeBitwidth(ImmBranchType branch_type); + VIXL_DEPRECATED( + "GetImmBranchRangeBitwidth", + static int ImmBranchRangeBitwidth(ImmBranchType branch_type)) { + return GetImmBranchRangeBitwidth(branch_type); + } + + static int32_t GetImmBranchForwardRange(ImmBranchType branch_type); + VIXL_DEPRECATED( + "GetImmBranchForwardRange", + static int32_t ImmBranchForwardRange(ImmBranchType branch_type)) { + return GetImmBranchForwardRange(branch_type); + } + + static bool IsValidImmPCOffset(ImmBranchType branch_type, int64_t offset); + + // Indicate whether Rd can be the stack pointer or the zero register. This + // does not check that the instruction actually has an Rd field. + Reg31Mode GetRdMode() const { + // The following instructions use sp or wsp as Rd: + // Add/sub (immediate) when not setting the flags. + // Add/sub (extended) when not setting the flags. + // Logical (immediate) when not setting the flags. + // Otherwise, r31 is the zero register. + if (IsAddSubImmediate() || IsAddSubExtended()) { + if (Mask(AddSubSetFlagsBit)) { + return Reg31IsZeroRegister; + } else { + return Reg31IsStackPointer; + } + } + if (IsLogicalImmediate()) { + // Of the logical (immediate) instructions, only ANDS (and its aliases) + // can set the flags. The others can all write into sp. + // Note that some logical operations are not available to + // immediate-operand instructions, so we have to combine two masks here. + if (Mask(LogicalImmediateMask & LogicalOpMask) == ANDS) { + return Reg31IsZeroRegister; + } else { + return Reg31IsStackPointer; + } + } + return Reg31IsZeroRegister; + } + VIXL_DEPRECATED("GetRdMode", Reg31Mode RdMode() const) { return GetRdMode(); } + + // Indicate whether Rn can be the stack pointer or the zero register. This + // does not check that the instruction actually has an Rn field. + Reg31Mode GetRnMode() const { + // The following instructions use sp or wsp as Rn: + // All loads and stores. + // Add/sub (immediate). + // Add/sub (extended). + // Otherwise, r31 is the zero register. + if (IsLoadOrStore() || IsAddSubImmediate() || IsAddSubExtended()) { + return Reg31IsStackPointer; + } + return Reg31IsZeroRegister; + } + VIXL_DEPRECATED("GetRnMode", Reg31Mode RnMode() const) { return GetRnMode(); } + + ImmBranchType GetBranchType() const { + if (IsCondBranchImm()) { + return CondBranchType; + } else if (IsUncondBranchImm()) { + return UncondBranchType; + } else if (IsCompareBranch()) { + return CompareBranchType; + } else if (IsTestBranch()) { + return TestBranchType; + } else { + return UnknownBranchType; + } + } + VIXL_DEPRECATED("GetBranchType", ImmBranchType BranchType() const) { + return GetBranchType(); + } + + // Find the target of this instruction. 'this' may be a branch or a + // PC-relative addressing instruction. + const Instruction* GetImmPCOffsetTarget() const; + VIXL_DEPRECATED("GetImmPCOffsetTarget", + const Instruction* ImmPCOffsetTarget() const) { + return GetImmPCOffsetTarget(); + } + + // Patch a PC-relative offset to refer to 'target'. 'this' may be a branch or + // a PC-relative addressing instruction. + void SetImmPCOffsetTarget(const Instruction* target); + // Patch a literal load instruction to load from 'source'. + void SetImmLLiteral(const Instruction* source); + + // The range of a load literal instruction, expressed as 'instr +- range'. + // The range is actually the 'positive' range; the branch instruction can + // target [instr - range - kInstructionSize, instr + range]. + static const int kLoadLiteralImmBitwidth = 19; + static const int kLoadLiteralRange = + (1 << kLoadLiteralImmBitwidth) / 2 - kInstructionSize; + + // Calculate the address of a literal referred to by a load-literal + // instruction, and return it as the specified type. + // + // The literal itself is safely mutable only if the backing buffer is safely + // mutable. + template + T GetLiteralAddress() const { + uint64_t base_raw = reinterpret_cast(this); + int64_t offset = GetImmLLiteral() * static_cast(kLiteralEntrySize); + uint64_t address_raw = base_raw + offset; + + // Cast the address using a C-style cast. A reinterpret_cast would be + // appropriate, but it can't cast one integral type to another. + T address = (T)(address_raw); + + // Assert that the address can be represented by the specified type. + VIXL_ASSERT((uint64_t)(address) == address_raw); + + return address; + } + template + VIXL_DEPRECATED("GetLiteralAddress", T LiteralAddress() const) { + return GetLiteralAddress(); + } + + uint32_t GetLiteral32() const { + uint32_t literal; + memcpy(&literal, GetLiteralAddress(), sizeof(literal)); + return literal; + } + VIXL_DEPRECATED("GetLiteral32", uint32_t Literal32() const) { + return GetLiteral32(); + } + + uint64_t GetLiteral64() const { + uint64_t literal; + memcpy(&literal, GetLiteralAddress(), sizeof(literal)); + return literal; + } + VIXL_DEPRECATED("GetLiteral64", uint64_t Literal64() const) { + return GetLiteral64(); + } + + float GetLiteralFP32() const { return RawbitsToFloat(GetLiteral32()); } + VIXL_DEPRECATED("GetLiteralFP32", float LiteralFP32() const) { + return GetLiteralFP32(); + } + + double GetLiteralFP64() const { return RawbitsToDouble(GetLiteral64()); } + VIXL_DEPRECATED("GetLiteralFP64", double LiteralFP64() const) { + return GetLiteralFP64(); + } + + Instruction* GetNextInstruction() { return this + kInstructionSize; } + const Instruction* GetNextInstruction() const { + return this + kInstructionSize; + } + VIXL_DEPRECATED("GetNextInstruction", + const Instruction* NextInstruction() const) { + return GetNextInstruction(); + } + + const Instruction* GetInstructionAtOffset(int64_t offset) const { + VIXL_ASSERT(IsWordAligned(this + offset)); + return this + offset; + } + VIXL_DEPRECATED("GetInstructionAtOffset", + const Instruction* InstructionAtOffset(int64_t offset) + const) { + return GetInstructionAtOffset(offset); + } + + template + static Instruction* Cast(T src) { + return reinterpret_cast(src); + } + + template + static const Instruction* CastConst(T src) { + return reinterpret_cast(src); + } + + private: + int GetImmBranch() const; + + void SetPCRelImmTarget(const Instruction* target); + void SetBranchImmTarget(const Instruction* target); +}; + + +// Functions for handling NEON and SVE vector format information. + +const int kMaxLanesPerVector = 16; + +VectorFormat VectorFormatHalfWidth(VectorFormat vform); +VectorFormat VectorFormatDoubleWidth(VectorFormat vform); +VectorFormat VectorFormatDoubleLanes(VectorFormat vform); +VectorFormat VectorFormatHalfLanes(VectorFormat vform); +VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits); +VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform); +VectorFormat VectorFormatFillQ(VectorFormat vform); +VectorFormat ScalarFormatFromFormat(VectorFormat vform); +VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits); +VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes); +VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log_2); +unsigned RegisterSizeInBitsFromFormat(VectorFormat vform); +unsigned RegisterSizeInBytesFromFormat(VectorFormat vform); +bool IsSVEFormat(VectorFormat vform); +// TODO: Make the return types of these functions consistent. +unsigned LaneSizeInBitsFromFormat(VectorFormat vform); +int LaneSizeInBytesFromFormat(VectorFormat vform); +int LaneSizeInBytesLog2FromFormat(VectorFormat vform); +int LaneCountFromFormat(VectorFormat vform); +int MaxLaneCountFromFormat(VectorFormat vform); +bool IsVectorFormat(VectorFormat vform); +int64_t MaxIntFromFormat(VectorFormat vform); +int64_t MinIntFromFormat(VectorFormat vform); +uint64_t MaxUintFromFormat(VectorFormat vform); + + +// clang-format off +enum NEONFormat { + NF_UNDEF = 0, + NF_8B = 1, + NF_16B = 2, + NF_4H = 3, + NF_8H = 4, + NF_2S = 5, + NF_4S = 6, + NF_1D = 7, + NF_2D = 8, + NF_B = 9, + NF_H = 10, + NF_S = 11, + NF_D = 12 +}; +// clang-format on + +static const unsigned kNEONFormatMaxBits = 6; + +struct NEONFormatMap { + // The bit positions in the instruction to consider. + uint8_t bits[kNEONFormatMaxBits]; + + // Mapping from concatenated bits to format. + NEONFormat map[1 << kNEONFormatMaxBits]; +}; + +class NEONFormatDecoder { + public: + enum SubstitutionMode { kPlaceholder, kFormat }; + + // Construct a format decoder with increasingly specific format maps for each + // substitution. If no format map is specified, the default is the integer + // format map. + explicit NEONFormatDecoder(const Instruction* instr) { + instrbits_ = instr->GetInstructionBits(); + SetFormatMaps(IntegerFormatMap()); + } + NEONFormatDecoder(const Instruction* instr, const NEONFormatMap* format) { + instrbits_ = instr->GetInstructionBits(); + SetFormatMaps(format); + } + NEONFormatDecoder(const Instruction* instr, + const NEONFormatMap* format0, + const NEONFormatMap* format1) { + instrbits_ = instr->GetInstructionBits(); + SetFormatMaps(format0, format1); + } + NEONFormatDecoder(const Instruction* instr, + const NEONFormatMap* format0, + const NEONFormatMap* format1, + const NEONFormatMap* format2) { + instrbits_ = instr->GetInstructionBits(); + SetFormatMaps(format0, format1, format2); + } + + // Set the format mapping for all or individual substitutions. + void SetFormatMaps(const NEONFormatMap* format0, + const NEONFormatMap* format1 = NULL, + const NEONFormatMap* format2 = NULL) { + VIXL_ASSERT(format0 != NULL); + formats_[0] = format0; + formats_[1] = (format1 == NULL) ? formats_[0] : format1; + formats_[2] = (format2 == NULL) ? formats_[1] : format2; + } + void SetFormatMap(unsigned index, const NEONFormatMap* format) { + VIXL_ASSERT(index <= ArrayLength(formats_)); + VIXL_ASSERT(format != NULL); + formats_[index] = format; + } + + // Substitute %s in the input string with the placeholder string for each + // register, ie. "'B", "'H", etc. + const char* SubstitutePlaceholders(const char* string) { + return Substitute(string, kPlaceholder, kPlaceholder, kPlaceholder); + } + + // Substitute %s in the input string with a new string based on the + // substitution mode. + const char* Substitute(const char* string, + SubstitutionMode mode0 = kFormat, + SubstitutionMode mode1 = kFormat, + SubstitutionMode mode2 = kFormat) { + const char* subst0 = GetSubstitute(0, mode0); + const char* subst1 = GetSubstitute(1, mode1); + const char* subst2 = GetSubstitute(2, mode2); + + if ((subst0 == NULL) || (subst1 == NULL) || (subst2 == NULL)) { + return NULL; + } + + snprintf(form_buffer_, + sizeof(form_buffer_), + string, + subst0, + subst1, + subst2); + return form_buffer_; + } + + // Append a "2" to a mnemonic string based on the state of the Q bit. + const char* Mnemonic(const char* mnemonic) { + if ((mnemonic != NULL) && (instrbits_ & NEON_Q) != 0) { + snprintf(mne_buffer_, sizeof(mne_buffer_), "%s2", mnemonic); + return mne_buffer_; + } + return mnemonic; + } + + VectorFormat GetVectorFormat(int format_index = 0) { + return GetVectorFormat(formats_[format_index]); + } + + VectorFormat GetVectorFormat(const NEONFormatMap* format_map) { + static const VectorFormat vform[] = {kFormatUndefined, + kFormat8B, + kFormat16B, + kFormat4H, + kFormat8H, + kFormat2S, + kFormat4S, + kFormat1D, + kFormat2D, + kFormatB, + kFormatH, + kFormatS, + kFormatD}; + VIXL_ASSERT(GetNEONFormat(format_map) < ArrayLength(vform)); + return vform[GetNEONFormat(format_map)]; + } + + // Built in mappings for common cases. + + // The integer format map uses three bits (Q, size<1:0>) to encode the + // "standard" set of NEON integer vector formats. + static const NEONFormatMap* IntegerFormatMap() { + static const NEONFormatMap map = + {{23, 22, 30}, + {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_2D}}; + return ↦ + } + + // The long integer format map uses two bits (size<1:0>) to encode the + // long set of NEON integer vector formats. These are used in narrow, wide + // and long operations. + static const NEONFormatMap* LongIntegerFormatMap() { + static const NEONFormatMap map = {{23, 22}, {NF_8H, NF_4S, NF_2D}}; + return ↦ + } + + // The FP format map uses two bits (Q, size<0>) to encode the NEON FP vector + // formats: NF_2S, NF_4S, NF_2D. + static const NEONFormatMap* FPFormatMap() { + // The FP format map assumes two bits (Q, size<0>) are used to encode the + // NEON FP vector formats: NF_2S, NF_4S, NF_2D. + static const NEONFormatMap map = {{22, 30}, + {NF_2S, NF_4S, NF_UNDEF, NF_2D}}; + return ↦ + } + + // The FP16 format map uses one bit (Q) to encode the NEON vector format: + // NF_4H, NF_8H. + static const NEONFormatMap* FP16FormatMap() { + static const NEONFormatMap map = {{30}, {NF_4H, NF_8H}}; + return ↦ + } + + // The load/store format map uses three bits (Q, 11, 10) to encode the + // set of NEON vector formats. + static const NEONFormatMap* LoadStoreFormatMap() { + static const NEONFormatMap map = + {{11, 10, 30}, + {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}}; + return ↦ + } + + // The logical format map uses one bit (Q) to encode the NEON vector format: + // NF_8B, NF_16B. + static const NEONFormatMap* LogicalFormatMap() { + static const NEONFormatMap map = {{30}, {NF_8B, NF_16B}}; + return ↦ + } + + // The triangular format map uses between two and five bits to encode the NEON + // vector format: + // xxx10->8B, xxx11->16B, xx100->4H, xx101->8H + // x1000->2S, x1001->4S, 10001->2D, all others undefined. + static const NEONFormatMap* TriangularFormatMap() { + static const NEONFormatMap map = + {{19, 18, 17, 16, 30}, + {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, + NF_2S, NF_4S, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, + NF_UNDEF, NF_2D, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B, + NF_2S, NF_4S, NF_8B, NF_16B, NF_4H, NF_8H, NF_8B, NF_16B}}; + return ↦ + } + + // The shift immediate map uses between two and five bits to encode the NEON + // vector format: + // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H, + // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined. + static const NEONFormatMap* ShiftImmFormatMap() { + static const NEONFormatMap map = {{22, 21, 20, 19, 30}, + {NF_UNDEF, NF_UNDEF, NF_8B, NF_16B, + NF_4H, NF_8H, NF_4H, NF_8H, + NF_2S, NF_4S, NF_2S, NF_4S, + NF_2S, NF_4S, NF_2S, NF_4S, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D, + NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}}; + return ↦ + } + + // The shift long/narrow immediate map uses between two and four bits to + // encode the NEON vector format: + // 0001->8H, 001x->4S, 01xx->2D, all others undefined. + static const NEONFormatMap* ShiftLongNarrowImmFormatMap() { + static const NEONFormatMap map = + {{22, 21, 20, 19}, + {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}}; + return ↦ + } + + // The scalar format map uses two bits (size<1:0>) to encode the NEON scalar + // formats: NF_B, NF_H, NF_S, NF_D. + static const NEONFormatMap* ScalarFormatMap() { + static const NEONFormatMap map = {{23, 22}, {NF_B, NF_H, NF_S, NF_D}}; + return ↦ + } + + // The long scalar format map uses two bits (size<1:0>) to encode the longer + // NEON scalar formats: NF_H, NF_S, NF_D. + static const NEONFormatMap* LongScalarFormatMap() { + static const NEONFormatMap map = {{23, 22}, {NF_H, NF_S, NF_D}}; + return ↦ + } + + // The FP scalar format map assumes one bit (size<0>) is used to encode the + // NEON FP scalar formats: NF_S, NF_D. + static const NEONFormatMap* FPScalarFormatMap() { + static const NEONFormatMap map = {{22}, {NF_S, NF_D}}; + return ↦ + } + + // The FP scalar pairwise format map assumes two bits (U, size<0>) are used to + // encode the NEON FP scalar formats: NF_H, NF_S, NF_D. + static const NEONFormatMap* FPScalarPairwiseFormatMap() { + static const NEONFormatMap map = {{29, 22}, {NF_H, NF_UNDEF, NF_S, NF_D}}; + return ↦ + } + + // The triangular scalar format map uses between one and four bits to encode + // the NEON FP scalar formats: + // xxx1->B, xx10->H, x100->S, 1000->D, all others undefined. + static const NEONFormatMap* TriangularScalarFormatMap() { + static const NEONFormatMap map = {{19, 18, 17, 16}, + {NF_UNDEF, + NF_B, + NF_H, + NF_B, + NF_S, + NF_B, + NF_H, + NF_B, + NF_D, + NF_B, + NF_H, + NF_B, + NF_S, + NF_B, + NF_H, + NF_B}}; + return ↦ + } + + private: + // Get a pointer to a string that represents the format or placeholder for + // the specified substitution index, based on the format map and instruction. + const char* GetSubstitute(int index, SubstitutionMode mode) { + if (mode == kFormat) { + return NEONFormatAsString(GetNEONFormat(formats_[index])); + } + VIXL_ASSERT(mode == kPlaceholder); + return NEONFormatAsPlaceholder(GetNEONFormat(formats_[index])); + } + + // Get the NEONFormat enumerated value for bits obtained from the + // instruction based on the specified format mapping. + NEONFormat GetNEONFormat(const NEONFormatMap* format_map) { + return format_map->map[PickBits(format_map->bits)]; + } + + // Convert a NEONFormat into a string. + static const char* NEONFormatAsString(NEONFormat format) { + // clang-format off + static const char* formats[] = { + NULL, + "8b", "16b", "4h", "8h", "2s", "4s", "1d", "2d", + "b", "h", "s", "d" + }; + // clang-format on + VIXL_ASSERT(format < ArrayLength(formats)); + return formats[format]; + } + + // Convert a NEONFormat into a register placeholder string. + static const char* NEONFormatAsPlaceholder(NEONFormat format) { + VIXL_ASSERT((format == NF_B) || (format == NF_H) || (format == NF_S) || + (format == NF_D) || (format == NF_UNDEF)); + // clang-format off + static const char* formats[] = { + NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "'B", "'H", "'S", "'D" + }; + // clang-format on + return formats[format]; + } + + // Select bits from instrbits_ defined by the bits array, concatenate them, + // and return the value. + uint8_t PickBits(const uint8_t bits[]) { + uint8_t result = 0; + for (unsigned b = 0; b < kNEONFormatMaxBits; b++) { + if (bits[b] == 0) break; + result <<= 1; + result |= ((instrbits_ & (1 << bits[b])) == 0) ? 0 : 1; + } + return result; + } + + Instr instrbits_; + const NEONFormatMap* formats_[3]; + char form_buffer_[64]; + char mne_buffer_[16]; +}; +} // namespace aarch64 +} // namespace vixl + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#endif // VIXL_AARCH64_INSTRUCTIONS_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/macro-assembler-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/macro-assembler-aarch64.h new file mode 100644 index 0000000000..2219bb8a1e --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/macro-assembler-aarch64.h @@ -0,0 +1,8809 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_MACRO_ASSEMBLER_AARCH64_H_ +#define VIXL_AARCH64_MACRO_ASSEMBLER_AARCH64_H_ + +#include +#include + +#include "../code-generation-scopes-vixl.h" +#include "../globals-vixl.h" +#include "../macro-assembler-interface.h" + +#include "assembler-aarch64.h" +// Required for runtime call support. +// TODO: Break this dependency. We should be able to separate out the necessary +// parts so that we don't need to include the whole simulator header. +#include "simulator-aarch64.h" +// Required in order to generate debugging instructions for the simulator. This +// is needed regardless of whether the simulator is included or not, since +// generating simulator specific instructions is controlled at runtime. +#include "simulator-constants-aarch64.h" + + +#define LS_MACRO_LIST(V) \ + V(Ldrb, Register&, rt, LDRB_w) \ + V(Strb, Register&, rt, STRB_w) \ + V(Ldrsb, Register&, rt, rt.Is64Bits() ? LDRSB_x : LDRSB_w) \ + V(Ldrh, Register&, rt, LDRH_w) \ + V(Strh, Register&, rt, STRH_w) \ + V(Ldrsh, Register&, rt, rt.Is64Bits() ? LDRSH_x : LDRSH_w) \ + V(Ldr, CPURegister&, rt, LoadOpFor(rt)) \ + V(Str, CPURegister&, rt, StoreOpFor(rt)) \ + V(Ldrsw, Register&, rt, LDRSW_x) + + +#define LSPAIR_MACRO_LIST(V) \ + V(Ldp, CPURegister&, rt, rt2, LoadPairOpFor(rt, rt2)) \ + V(Stp, CPURegister&, rt, rt2, StorePairOpFor(rt, rt2)) \ + V(Ldpsw, Register&, rt, rt2, LDPSW_x) + +namespace vixl { +namespace aarch64 { + +// Forward declaration +class MacroAssembler; +class UseScratchRegisterScope; + +class Pool { + public: + explicit Pool(MacroAssembler* masm) + : checkpoint_(kNoCheckpointRequired), masm_(masm) { + Reset(); + } + + void Reset() { + checkpoint_ = kNoCheckpointRequired; + monitor_ = 0; + } + + void Block() { monitor_++; } + void Release(); + bool IsBlocked() const { return monitor_ != 0; } + + static const ptrdiff_t kNoCheckpointRequired = PTRDIFF_MAX; + + void SetNextCheckpoint(ptrdiff_t checkpoint); + ptrdiff_t GetCheckpoint() const { return checkpoint_; } + VIXL_DEPRECATED("GetCheckpoint", ptrdiff_t checkpoint() const) { + return GetCheckpoint(); + } + + enum EmitOption { kBranchRequired, kNoBranchRequired }; + + protected: + // Next buffer offset at which a check is required for this pool. + ptrdiff_t checkpoint_; + // Indicates whether the emission of this pool is blocked. + int monitor_; + // The MacroAssembler using this pool. + MacroAssembler* masm_; +}; + + +class LiteralPool : public Pool { + public: + explicit LiteralPool(MacroAssembler* masm); + ~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION; + void Reset(); + + void AddEntry(RawLiteral* literal); + bool IsEmpty() const { return entries_.empty(); } + size_t GetSize() const; + VIXL_DEPRECATED("GetSize", size_t Size() const) { return GetSize(); } + + size_t GetMaxSize() const; + VIXL_DEPRECATED("GetMaxSize", size_t MaxSize() const) { return GetMaxSize(); } + + size_t GetOtherPoolsMaxSize() const; + VIXL_DEPRECATED("GetOtherPoolsMaxSize", size_t OtherPoolsMaxSize() const) { + return GetOtherPoolsMaxSize(); + } + + void CheckEmitFor(size_t amount, EmitOption option = kBranchRequired); + // Check whether we need to emit the literal pool in order to be able to + // safely emit a branch with a given range. + void CheckEmitForBranch(size_t range); + void Emit(EmitOption option = kNoBranchRequired); + + void SetNextRecommendedCheckpoint(ptrdiff_t offset); + ptrdiff_t GetNextRecommendedCheckpoint(); + VIXL_DEPRECATED("GetNextRecommendedCheckpoint", + ptrdiff_t NextRecommendedCheckpoint()) { + return GetNextRecommendedCheckpoint(); + } + + void UpdateFirstUse(ptrdiff_t use_position); + + void DeleteOnDestruction(RawLiteral* literal) { + deleted_on_destruction_.push_back(literal); + } + + // Recommended not exact since the pool can be blocked for short periods. + static const ptrdiff_t kRecommendedLiteralPoolRange = 128 * KBytes; + + private: + std::vector entries_; + size_t size_; + ptrdiff_t first_use_; + // The parent class `Pool` provides a `checkpoint_`, which is the buffer + // offset before which a check *must* occur. This recommended checkpoint + // indicates when we would like to start emitting the constant pool. The + // MacroAssembler can, but does not have to, check the buffer when the + // checkpoint is reached. + ptrdiff_t recommended_checkpoint_; + + std::vector deleted_on_destruction_; +}; + + +inline size_t LiteralPool::GetSize() const { + // Account for the pool header. + return size_ + kInstructionSize; +} + + +inline size_t LiteralPool::GetMaxSize() const { + // Account for the potential branch over the pool. + return GetSize() + kInstructionSize; +} + + +inline ptrdiff_t LiteralPool::GetNextRecommendedCheckpoint() { + return first_use_ + kRecommendedLiteralPoolRange; +} + + +class VeneerPool : public Pool { + public: + explicit VeneerPool(MacroAssembler* masm) : Pool(masm) {} + + void Reset(); + + void Block() { monitor_++; } + void Release(); + bool IsBlocked() const { return monitor_ != 0; } + bool IsEmpty() const { return unresolved_branches_.IsEmpty(); } + + class BranchInfo { + public: + BranchInfo() + : first_unreacheable_pc_(0), + pc_offset_(0), + label_(NULL), + branch_type_(UnknownBranchType) {} + BranchInfo(ptrdiff_t offset, Label* label, ImmBranchType branch_type) + : pc_offset_(offset), label_(label), branch_type_(branch_type) { + first_unreacheable_pc_ = + pc_offset_ + Instruction::GetImmBranchForwardRange(branch_type_); + } + + static bool IsValidComparison(const BranchInfo& branch_1, + const BranchInfo& branch_2) { + // BranchInfo are always compared against against other objects with + // the same branch type. + if (branch_1.branch_type_ != branch_2.branch_type_) { + return false; + } + // Since we should never have two branch infos with the same offsets, it + // first looks like we should check that offsets are different. However + // the operators may also be used to *search* for a branch info in the + // set. + bool same_offsets = (branch_1.pc_offset_ == branch_2.pc_offset_); + return (!same_offsets || ((branch_1.label_ == branch_2.label_) && + (branch_1.first_unreacheable_pc_ == + branch_2.first_unreacheable_pc_))); + } + + // We must provide comparison operators to work with InvalSet. + bool operator==(const BranchInfo& other) const { + VIXL_ASSERT(IsValidComparison(*this, other)); + return pc_offset_ == other.pc_offset_; + } + bool operator<(const BranchInfo& other) const { + VIXL_ASSERT(IsValidComparison(*this, other)); + return pc_offset_ < other.pc_offset_; + } + bool operator<=(const BranchInfo& other) const { + VIXL_ASSERT(IsValidComparison(*this, other)); + return pc_offset_ <= other.pc_offset_; + } + bool operator>(const BranchInfo& other) const { + VIXL_ASSERT(IsValidComparison(*this, other)); + return pc_offset_ > other.pc_offset_; + } + + // First instruction position that is not reachable by the branch using a + // positive branch offset. + ptrdiff_t first_unreacheable_pc_; + // Offset of the branch in the code generation buffer. + ptrdiff_t pc_offset_; + // The label branched to. + Label* label_; + ImmBranchType branch_type_; + }; + + bool BranchTypeUsesVeneers(ImmBranchType type) { + return (type != UnknownBranchType) && (type != UncondBranchType); + } + + void RegisterUnresolvedBranch(ptrdiff_t branch_pos, + Label* label, + ImmBranchType branch_type); + void DeleteUnresolvedBranchInfoForLabel(Label* label); + + bool ShouldEmitVeneer(int64_t first_unreacheable_pc, size_t amount); + bool ShouldEmitVeneers(size_t amount) { + return ShouldEmitVeneer(unresolved_branches_.GetFirstLimit(), amount); + } + + void CheckEmitFor(size_t amount, EmitOption option = kBranchRequired); + void Emit(EmitOption option, size_t margin); + + // The code size generated for a veneer. Currently one branch instruction. + // This is for code size checking purposes, and can be extended in the future + // for example if we decide to add nops between the veneers. + static const int kVeneerCodeSize = 1 * kInstructionSize; + // The maximum size of code other than veneers that can be generated when + // emitting a veneer pool. Currently there can be an additional branch to jump + // over the pool. + static const int kPoolNonVeneerCodeSize = 1 * kInstructionSize; + + void UpdateNextCheckPoint() { SetNextCheckpoint(GetNextCheckPoint()); } + + int GetNumberOfPotentialVeneers() const { + return static_cast(unresolved_branches_.GetSize()); + } + VIXL_DEPRECATED("GetNumberOfPotentialVeneers", + int NumberOfPotentialVeneers() const) { + return GetNumberOfPotentialVeneers(); + } + + size_t GetMaxSize() const { + return kPoolNonVeneerCodeSize + + unresolved_branches_.GetSize() * kVeneerCodeSize; + } + VIXL_DEPRECATED("GetMaxSize", size_t MaxSize() const) { return GetMaxSize(); } + + size_t GetOtherPoolsMaxSize() const; + VIXL_DEPRECATED("GetOtherPoolsMaxSize", size_t OtherPoolsMaxSize() const) { + return GetOtherPoolsMaxSize(); + } + + static const int kNPreallocatedInfos = 4; + static const ptrdiff_t kInvalidOffset = PTRDIFF_MAX; + static const size_t kReclaimFrom = 128; + static const size_t kReclaimFactor = 16; + + private: + typedef InvalSet + BranchInfoTypedSetBase; + typedef InvalSetIterator BranchInfoTypedSetIterBase; + + class BranchInfoTypedSet : public BranchInfoTypedSetBase { + public: + BranchInfoTypedSet() : BranchInfoTypedSetBase() {} + + ptrdiff_t GetFirstLimit() { + if (empty()) { + return kInvalidOffset; + } + return GetMinElementKey(); + } + VIXL_DEPRECATED("GetFirstLimit", ptrdiff_t FirstLimit()) { + return GetFirstLimit(); + } + }; + + class BranchInfoTypedSetIterator : public BranchInfoTypedSetIterBase { + public: + BranchInfoTypedSetIterator() : BranchInfoTypedSetIterBase(NULL) {} + explicit BranchInfoTypedSetIterator(BranchInfoTypedSet* typed_set) + : BranchInfoTypedSetIterBase(typed_set) {} + + // TODO: Remove these and use the STL-like interface instead. + using BranchInfoTypedSetIterBase::Advance; + using BranchInfoTypedSetIterBase::Current; + }; + + class BranchInfoSet { + public: + void insert(BranchInfo branch_info) { + ImmBranchType type = branch_info.branch_type_; + VIXL_ASSERT(IsValidBranchType(type)); + typed_set_[BranchIndexFromType(type)].insert(branch_info); + } + + void erase(BranchInfo branch_info) { + if (IsValidBranchType(branch_info.branch_type_)) { + int index = + BranchInfoSet::BranchIndexFromType(branch_info.branch_type_); + typed_set_[index].erase(branch_info); + } + } + + size_t GetSize() const { + size_t res = 0; + for (int i = 0; i < kNumberOfTrackedBranchTypes; i++) { + res += typed_set_[i].size(); + } + return res; + } + VIXL_DEPRECATED("GetSize", size_t size() const) { return GetSize(); } + + bool IsEmpty() const { + for (int i = 0; i < kNumberOfTrackedBranchTypes; i++) { + if (!typed_set_[i].empty()) { + return false; + } + } + return true; + } + VIXL_DEPRECATED("IsEmpty", bool empty() const) { return IsEmpty(); } + + ptrdiff_t GetFirstLimit() { + ptrdiff_t res = kInvalidOffset; + for (int i = 0; i < kNumberOfTrackedBranchTypes; i++) { + res = std::min(res, typed_set_[i].GetFirstLimit()); + } + return res; + } + VIXL_DEPRECATED("GetFirstLimit", ptrdiff_t FirstLimit()) { + return GetFirstLimit(); + } + + void Reset() { + for (int i = 0; i < kNumberOfTrackedBranchTypes; i++) { + typed_set_[i].clear(); + } + } + + static ImmBranchType BranchTypeFromIndex(int index) { + switch (index) { + case 0: + return CondBranchType; + case 1: + return CompareBranchType; + case 2: + return TestBranchType; + default: + VIXL_UNREACHABLE(); + return UnknownBranchType; + } + } + static int BranchIndexFromType(ImmBranchType branch_type) { + switch (branch_type) { + case CondBranchType: + return 0; + case CompareBranchType: + return 1; + case TestBranchType: + return 2; + default: + VIXL_UNREACHABLE(); + return 0; + } + } + + bool IsValidBranchType(ImmBranchType branch_type) { + return (branch_type != UnknownBranchType) && + (branch_type != UncondBranchType); + } + + private: + static const int kNumberOfTrackedBranchTypes = 3; + BranchInfoTypedSet typed_set_[kNumberOfTrackedBranchTypes]; + + friend class VeneerPool; + friend class BranchInfoSetIterator; + }; + + class BranchInfoSetIterator { + public: + explicit BranchInfoSetIterator(BranchInfoSet* set) : set_(set) { + for (int i = 0; i < BranchInfoSet::kNumberOfTrackedBranchTypes; i++) { + new (&sub_iterator_[i]) + BranchInfoTypedSetIterator(&(set_->typed_set_[i])); + } + } + + VeneerPool::BranchInfo* Current() { + for (int i = 0; i < BranchInfoSet::kNumberOfTrackedBranchTypes; i++) { + if (!sub_iterator_[i].Done()) { + return sub_iterator_[i].Current(); + } + } + VIXL_UNREACHABLE(); + return NULL; + } + + void Advance() { + VIXL_ASSERT(!Done()); + for (int i = 0; i < BranchInfoSet::kNumberOfTrackedBranchTypes; i++) { + if (!sub_iterator_[i].Done()) { + sub_iterator_[i].Advance(); + return; + } + } + VIXL_UNREACHABLE(); + } + + bool Done() const { + for (int i = 0; i < BranchInfoSet::kNumberOfTrackedBranchTypes; i++) { + if (!sub_iterator_[i].Done()) return false; + } + return true; + } + + void AdvanceToNextType() { + VIXL_ASSERT(!Done()); + for (int i = 0; i < BranchInfoSet::kNumberOfTrackedBranchTypes; i++) { + if (!sub_iterator_[i].Done()) { + sub_iterator_[i].Finish(); + return; + } + } + VIXL_UNREACHABLE(); + } + + void DeleteCurrentAndAdvance() { + for (int i = 0; i < BranchInfoSet::kNumberOfTrackedBranchTypes; i++) { + if (!sub_iterator_[i].Done()) { + sub_iterator_[i].DeleteCurrentAndAdvance(); + return; + } + } + } + + private: + BranchInfoSet* set_; + BranchInfoTypedSetIterator + sub_iterator_[BranchInfoSet::kNumberOfTrackedBranchTypes]; + }; + + ptrdiff_t GetNextCheckPoint() { + if (unresolved_branches_.IsEmpty()) { + return kNoCheckpointRequired; + } else { + return unresolved_branches_.GetFirstLimit(); + } + } + VIXL_DEPRECATED("GetNextCheckPoint", ptrdiff_t NextCheckPoint()) { + return GetNextCheckPoint(); + } + + // Information about unresolved (forward) branches. + BranchInfoSet unresolved_branches_; +}; + + +// Helper for common Emission checks. +// The macro-instruction maps to a single instruction. +class SingleEmissionCheckScope : public EmissionCheckScope { + public: + explicit SingleEmissionCheckScope(MacroAssemblerInterface* masm) + : EmissionCheckScope(masm, kInstructionSize) {} +}; + + +// The macro instruction is a "typical" macro-instruction. Typical macro- +// instruction only emit a few instructions, a few being defined as 8 here. +class MacroEmissionCheckScope : public EmissionCheckScope { + public: + explicit MacroEmissionCheckScope(MacroAssemblerInterface* masm) + : EmissionCheckScope(masm, kTypicalMacroInstructionMaxSize) {} + + private: + static const size_t kTypicalMacroInstructionMaxSize = 8 * kInstructionSize; +}; + + +// This scope simplifies the handling of the SVE `movprfx` instruction. +// +// If dst.Aliases(src): +// - Start an ExactAssemblyScope(masm, kInstructionSize). +// Otherwise: +// - Start an ExactAssemblyScope(masm, 2 * kInstructionSize). +// - Generate a suitable `movprfx` instruction. +// +// In both cases, the ExactAssemblyScope is left with enough remaining space for +// exactly one destructive instruction. +class MovprfxHelperScope : public ExactAssemblyScope { + public: + inline MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const ZRegister& src); + + inline MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const PRegister& pg, + const ZRegister& src); + + // TODO: Implement constructors that examine _all_ sources. If `dst` aliases + // any other source register, we can't use `movprfx`. This isn't obviously + // useful, but the MacroAssembler should not generate invalid code for it. + // Valid behaviour can be implemented using `mov`. + // + // The best way to handle this in an instruction-agnostic way is probably to + // use variadic templates. + + private: + inline bool ShouldGenerateMovprfx(const ZRegister& dst, + const ZRegister& src) { + VIXL_ASSERT(AreSameLaneSize(dst, src)); + return !dst.Aliases(src); + } + + inline bool ShouldGenerateMovprfx(const ZRegister& dst, + const PRegister& pg, + const ZRegister& src) { + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + // We need to emit movprfx in two cases: + // 1. To give a predicated merging unary instruction zeroing predication. + // 2. To make destructive instructions constructive. + // + // There are no predicated zeroing instructions that can take movprfx, so we + // will never generate an unnecessary movprfx with this logic. + return pg.IsZeroing() || ShouldGenerateMovprfx(dst, src); + } +}; + + +enum BranchType { + // Copies of architectural conditions. + // The associated conditions can be used in place of those, the code will + // take care of reinterpreting them with the correct type. + integer_eq = eq, + integer_ne = ne, + integer_hs = hs, + integer_lo = lo, + integer_mi = mi, + integer_pl = pl, + integer_vs = vs, + integer_vc = vc, + integer_hi = hi, + integer_ls = ls, + integer_ge = ge, + integer_lt = lt, + integer_gt = gt, + integer_le = le, + integer_al = al, + integer_nv = nv, + + // These two are *different* from the architectural codes al and nv. + // 'always' is used to generate unconditional branches. + // 'never' is used to not generate a branch (generally as the inverse + // branch type of 'always). + always, + never, + // cbz and cbnz + reg_zero, + reg_not_zero, + // tbz and tbnz + reg_bit_clear, + reg_bit_set, + + // Aliases. + kBranchTypeFirstCondition = eq, + kBranchTypeLastCondition = nv, + kBranchTypeFirstUsingReg = reg_zero, + kBranchTypeFirstUsingBit = reg_bit_clear, + + // SVE branch conditions. + integer_none = eq, + integer_any = ne, + integer_nlast = cs, + integer_last = cc, + integer_first = mi, + integer_nfrst = pl, + integer_pmore = hi, + integer_plast = ls, + integer_tcont = ge, + integer_tstop = lt +}; + + +enum DiscardMoveMode { kDontDiscardForSameWReg, kDiscardForSameWReg }; + +// The macro assembler supports moving automatically pre-shifted immediates for +// arithmetic and logical instructions, and then applying a post shift in the +// instruction to undo the modification, in order to reduce the code emitted for +// an operation. For example: +// +// Add(x0, x0, 0x1f7de) => movz x16, 0xfbef; add x0, x0, x16, lsl #1. +// +// This optimisation can be only partially applied when the stack pointer is an +// operand or destination, so this enumeration is used to control the shift. +enum PreShiftImmMode { + kNoShift, // Don't pre-shift. + kLimitShiftForSP, // Limit pre-shift for add/sub extend use. + kAnyShift // Allow any pre-shift. +}; + +enum FPMacroNaNPropagationOption { + // The default option. This generates a run-time error in macros that respect + // this option. + NoFPMacroNaNPropagationSelected, + // For example, Fmin(result, NaN(a), NaN(b)) always selects NaN(a) if both + // NaN(a) and NaN(b) are both quiet, or both are signalling, at the + // cost of extra code generation in some cases. + StrictNaNPropagation, + // For example, Fmin(result, NaN(a), NaN(b)) selects either NaN, but using the + // fewest instructions. + FastNaNPropagation +}; + +class MacroAssembler : public Assembler, public MacroAssemblerInterface { + public: + MacroAssembler(byte* buffer, + size_t capacity, + PositionIndependentCodeOption pic = PositionIndependentCode); + ~MacroAssembler(); + + enum FinalizeOption { + kFallThrough, // There may be more code to execute after calling Finalize. + kUnreachable // Anything generated after calling Finalize is unreachable. + }; + + virtual vixl::internal::AssemblerBase* AsAssemblerBase() VIXL_OVERRIDE { + return this; + } + + // TODO(pools): implement these functions. + virtual void EmitPoolHeader() VIXL_OVERRIDE {} + virtual void EmitPoolFooter() VIXL_OVERRIDE {} + virtual void EmitPaddingBytes(int n) VIXL_OVERRIDE { USE(n); } + virtual void EmitNopBytes(int n) VIXL_OVERRIDE { USE(n); } + + // Start generating code from the beginning of the buffer, discarding any code + // and data that has already been emitted into the buffer. + // + // In order to avoid any accidental transfer of state, Reset ASSERTs that the + // constant pool is not blocked. + void Reset(); + + // Finalize a code buffer of generated instructions. This function must be + // called before executing or copying code from the buffer. By default, + // anything generated after this should not be reachable (the last instruction + // generated is an unconditional branch). If you need to generate more code, + // then set `option` to kFallThrough. + void FinalizeCode(FinalizeOption option = kUnreachable); + + + // Constant generation helpers. + // These functions return the number of instructions required to move the + // immediate into the destination register. Also, if the masm pointer is + // non-null, it generates the code to do so. + // The two features are implemented using one function to avoid duplication of + // the logic. + // The function can be used to evaluate the cost of synthesizing an + // instruction using 'mov immediate' instructions. A user might prefer loading + // a constant using the literal pool instead of using multiple 'mov immediate' + // instructions. + static int MoveImmediateHelper(MacroAssembler* masm, + const Register& rd, + uint64_t imm); + + + // Logical macros. + void And(const Register& rd, const Register& rn, const Operand& operand); + void Ands(const Register& rd, const Register& rn, const Operand& operand); + void Bic(const Register& rd, const Register& rn, const Operand& operand); + void Bics(const Register& rd, const Register& rn, const Operand& operand); + void Orr(const Register& rd, const Register& rn, const Operand& operand); + void Orn(const Register& rd, const Register& rn, const Operand& operand); + void Eor(const Register& rd, const Register& rn, const Operand& operand); + void Eon(const Register& rd, const Register& rn, const Operand& operand); + void Tst(const Register& rn, const Operand& operand); + void LogicalMacro(const Register& rd, + const Register& rn, + const Operand& operand, + LogicalOp op); + + // Add and sub macros. + void Add(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S = LeaveFlags); + void Adds(const Register& rd, const Register& rn, const Operand& operand); + void Sub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S = LeaveFlags); + void Subs(const Register& rd, const Register& rn, const Operand& operand); + void Cmn(const Register& rn, const Operand& operand); + void Cmp(const Register& rn, const Operand& operand); + void Neg(const Register& rd, const Operand& operand); + void Negs(const Register& rd, const Operand& operand); + + void AddSubMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op); + + // Add/sub with carry macros. + void Adc(const Register& rd, const Register& rn, const Operand& operand); + void Adcs(const Register& rd, const Register& rn, const Operand& operand); + void Sbc(const Register& rd, const Register& rn, const Operand& operand); + void Sbcs(const Register& rd, const Register& rn, const Operand& operand); + void Ngc(const Register& rd, const Operand& operand); + void Ngcs(const Register& rd, const Operand& operand); + void AddSubWithCarryMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op); + + void Rmif(const Register& xn, unsigned shift, StatusFlags flags); + void Setf8(const Register& wn); + void Setf16(const Register& wn); + + // Move macros. + void Mov(const Register& rd, uint64_t imm); + void Mov(const Register& rd, + const Operand& operand, + DiscardMoveMode discard_mode = kDontDiscardForSameWReg); + void Mvn(const Register& rd, uint64_t imm) { + Mov(rd, (rd.GetSizeInBits() == kXRegSize) ? ~imm : (~imm & kWRegMask)); + } + void Mvn(const Register& rd, const Operand& operand); + + // Try to move an immediate into the destination register in a single + // instruction. Returns true for success, and updates the contents of dst. + // Returns false, otherwise. + bool TryOneInstrMoveImmediate(const Register& dst, uint64_t imm); + + // Move an immediate into register dst, and return an Operand object for + // use with a subsequent instruction that accepts a shift. The value moved + // into dst is not necessarily equal to imm; it may have had a shifting + // operation applied to it that will be subsequently undone by the shift + // applied in the Operand. + Operand MoveImmediateForShiftedOp(const Register& dst, + uint64_t imm, + PreShiftImmMode mode); + + void Move(const GenericOperand& dst, const GenericOperand& src); + + // Synthesises the address represented by a MemOperand into a register. + void ComputeAddress(const Register& dst, const MemOperand& mem_op); + + // Conditional macros. + void Ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + void Ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond); + void ConditionalCompareMacro(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op); + + // On return, the boolean values pointed to will indicate whether `left` and + // `right` should be synthesised in a temporary register. + static void GetCselSynthesisInformation(const Register& rd, + const Operand& left, + const Operand& right, + bool* should_synthesise_left, + bool* should_synthesise_right) { + // Note that the helper does not need to look at the condition. + CselHelper(NULL, + rd, + left, + right, + eq, + should_synthesise_left, + should_synthesise_right); + } + + void Csel(const Register& rd, + const Operand& left, + const Operand& right, + Condition cond) { + CselHelper(this, rd, left, right, cond); + } + +// Load/store macros. +#define DECLARE_FUNCTION(FN, REGTYPE, REG, OP) \ + void FN(const REGTYPE REG, const MemOperand& addr); + LS_MACRO_LIST(DECLARE_FUNCTION) +#undef DECLARE_FUNCTION + + void LoadStoreMacro(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op); + +#define DECLARE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \ + void FN(const REGTYPE REG, const REGTYPE REG2, const MemOperand& addr); + LSPAIR_MACRO_LIST(DECLARE_FUNCTION) +#undef DECLARE_FUNCTION + + void LoadStorePairMacro(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op); + + void Prfm(PrefetchOperation op, const MemOperand& addr); + + // Push or pop up to 4 registers of the same width to or from the stack, + // using the current stack pointer as set by SetStackPointer. + // + // If an argument register is 'NoReg', all further arguments are also assumed + // to be 'NoReg', and are thus not pushed or popped. + // + // Arguments are ordered such that "Push(a, b);" is functionally equivalent + // to "Push(a); Push(b);". + // + // It is valid to push the same register more than once, and there is no + // restriction on the order in which registers are specified. + // + // It is not valid to pop into the same register more than once in one + // operation, not even into the zero register. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then it + // must be aligned to 16 bytes on entry and the total size of the specified + // registers must also be a multiple of 16 bytes. + // + // Even if the current stack pointer is not the system stack pointer (sp), + // Push (and derived methods) will still modify the system stack pointer in + // order to comply with ABI rules about accessing memory below the system + // stack pointer. + // + // Other than the registers passed into Pop, the stack pointer and (possibly) + // the system stack pointer, these methods do not modify any other registers. + void Push(const CPURegister& src0, + const CPURegister& src1 = NoReg, + const CPURegister& src2 = NoReg, + const CPURegister& src3 = NoReg); + void Pop(const CPURegister& dst0, + const CPURegister& dst1 = NoReg, + const CPURegister& dst2 = NoReg, + const CPURegister& dst3 = NoReg); + + // Alternative forms of Push and Pop, taking a RegList or CPURegList that + // specifies the registers that are to be pushed or popped. Higher-numbered + // registers are associated with higher memory addresses (as in the A32 push + // and pop instructions). + // + // (Push|Pop)SizeRegList allow you to specify the register size as a + // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are + // supported. + // + // Otherwise, (Push|Pop)(CPU|X|W|D|S)RegList is preferred. + void PushCPURegList(CPURegList registers); + void PopCPURegList(CPURegList registers); + + void PushSizeRegList( + RegList registers, + unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PushCPURegList(CPURegList(type, reg_size, registers)); + } + void PopSizeRegList(RegList registers, + unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PopCPURegList(CPURegList(type, reg_size, registers)); + } + void PushXRegList(RegList regs) { PushSizeRegList(regs, kXRegSize); } + void PopXRegList(RegList regs) { PopSizeRegList(regs, kXRegSize); } + void PushWRegList(RegList regs) { PushSizeRegList(regs, kWRegSize); } + void PopWRegList(RegList regs) { PopSizeRegList(regs, kWRegSize); } + void PushDRegList(RegList regs) { + PushSizeRegList(regs, kDRegSize, CPURegister::kVRegister); + } + void PopDRegList(RegList regs) { + PopSizeRegList(regs, kDRegSize, CPURegister::kVRegister); + } + void PushSRegList(RegList regs) { + PushSizeRegList(regs, kSRegSize, CPURegister::kVRegister); + } + void PopSRegList(RegList regs) { + PopSizeRegList(regs, kSRegSize, CPURegister::kVRegister); + } + + // Push the specified register 'count' times. + void PushMultipleTimes(int count, Register src); + + // Poke 'src' onto the stack. The offset is in bytes. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then sp + // must be aligned to 16 bytes. + void Poke(const Register& src, const Operand& offset); + + // Peek at a value on the stack, and put it in 'dst'. The offset is in bytes. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then sp + // must be aligned to 16 bytes. + void Peek(const Register& dst, const Operand& offset); + + // Alternative forms of Peek and Poke, taking a RegList or CPURegList that + // specifies the registers that are to be pushed or popped. Higher-numbered + // registers are associated with higher memory addresses. + // + // (Peek|Poke)SizeRegList allow you to specify the register size as a + // parameter. Only kXRegSize, kWRegSize, kDRegSize and kSRegSize are + // supported. + // + // Otherwise, (Peek|Poke)(CPU|X|W|D|S)RegList is preferred. + void PeekCPURegList(CPURegList registers, int64_t offset) { + LoadCPURegList(registers, MemOperand(StackPointer(), offset)); + } + void PokeCPURegList(CPURegList registers, int64_t offset) { + StoreCPURegList(registers, MemOperand(StackPointer(), offset)); + } + + void PeekSizeRegList( + RegList registers, + int64_t offset, + unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PeekCPURegList(CPURegList(type, reg_size, registers), offset); + } + void PokeSizeRegList( + RegList registers, + int64_t offset, + unsigned reg_size, + CPURegister::RegisterType type = CPURegister::kRegister) { + PokeCPURegList(CPURegList(type, reg_size, registers), offset); + } + void PeekXRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kXRegSize); + } + void PokeXRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kXRegSize); + } + void PeekWRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kWRegSize); + } + void PokeWRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kWRegSize); + } + void PeekDRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister); + } + void PokeDRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kDRegSize, CPURegister::kVRegister); + } + void PeekSRegList(RegList regs, int64_t offset) { + PeekSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister); + } + void PokeSRegList(RegList regs, int64_t offset) { + PokeSizeRegList(regs, offset, kSRegSize, CPURegister::kVRegister); + } + + + // Claim or drop stack space without actually accessing memory. + // + // If the current stack pointer (as set by SetStackPointer) is sp, then it + // must be aligned to 16 bytes and the size claimed or dropped must be a + // multiple of 16 bytes. + void Claim(const Operand& size); + void Drop(const Operand& size); + + // As above, but for multiples of the SVE vector length. + void ClaimVL(int64_t multiplier) { + // We never need to worry about sp alignment because the VL is always a + // multiple of 16. + VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0); + VIXL_ASSERT(multiplier >= 0); + Addvl(sp, sp, -multiplier); + } + void DropVL(int64_t multiplier) { + VIXL_STATIC_ASSERT((kZRegMinSizeInBytes % 16) == 0); + VIXL_ASSERT(multiplier >= 0); + Addvl(sp, sp, multiplier); + } + + // Preserve the callee-saved registers (as defined by AAPCS64). + // + // Higher-numbered registers are pushed before lower-numbered registers, and + // thus get higher addresses. + // Floating-point registers are pushed before general-purpose registers, and + // thus get higher addresses. + // + // This method must not be called unless StackPointer() is sp, and it is + // aligned to 16 bytes. + void PushCalleeSavedRegisters(); + + // Restore the callee-saved registers (as defined by AAPCS64). + // + // Higher-numbered registers are popped after lower-numbered registers, and + // thus come from higher addresses. + // Floating-point registers are popped after general-purpose registers, and + // thus come from higher addresses. + // + // This method must not be called unless StackPointer() is sp, and it is + // aligned to 16 bytes. + void PopCalleeSavedRegisters(); + + void LoadCPURegList(CPURegList registers, const MemOperand& src); + void StoreCPURegList(CPURegList registers, const MemOperand& dst); + + // Remaining instructions are simple pass-through calls to the assembler. + void Adr(const Register& rd, Label* label) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + adr(rd, label); + } + void Adrp(const Register& rd, Label* label) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + adrp(rd, label); + } + void Asr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + asr(rd, rn, shift); + } + void Asr(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + asrv(rd, rn, rm); + } + + // Branch type inversion relies on these relations. + VIXL_STATIC_ASSERT((reg_zero == (reg_not_zero ^ 1)) && + (reg_bit_clear == (reg_bit_set ^ 1)) && + (always == (never ^ 1))); + + BranchType InvertBranchType(BranchType type) { + if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) { + return static_cast( + InvertCondition(static_cast(type))); + } else { + return static_cast(type ^ 1); + } + } + + void B(Label* label, BranchType type, Register reg = NoReg, int bit = -1); + + void B(Label* label); + void B(Label* label, Condition cond); + void B(Condition cond, Label* label) { B(label, cond); } + void Bfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + bfm(rd, rn, immr, imms); + } + void Bfi(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + bfi(rd, rn, lsb, width); + } + void Bfc(const Register& rd, unsigned lsb, unsigned width) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + bfc(rd, lsb, width); + } + void Bfxil(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + bfxil(rd, rn, lsb, width); + } + void Bind(Label* label, BranchTargetIdentifier id = EmitBTI_none); + // Bind a label to a specified offset from the start of the buffer. + void BindToOffset(Label* label, ptrdiff_t offset); + void Bl(Label* label) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bl(label); + } + void Blr(const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!xn.IsZero()); + SingleEmissionCheckScope guard(this); + blr(xn); + } + void Br(const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!xn.IsZero()); + SingleEmissionCheckScope guard(this); + br(xn); + } + void Braaz(const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + braaz(xn); + } + void Brabz(const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brabz(xn); + } + void Blraaz(const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + blraaz(xn); + } + void Blrabz(const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + blrabz(xn); + } + void Retaa() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + retaa(); + } + void Retab() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + retab(); + } + void Braa(const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + braa(xn, xm); + } + void Brab(const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brab(xn, xm); + } + void Blraa(const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + blraa(xn, xm); + } + void Blrab(const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + blrab(xn, xm); + } + void Brk(int code = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brk(code); + } + void Cbnz(const Register& rt, Label* label); + void Cbz(const Register& rt, Label* label); + void Cinc(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cinc(rd, rn, cond); + } + void Cinv(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cinv(rd, rn, cond); + } + +#define PAUTH_SYSTEM_MODES(V) \ + V(az) \ + V(bz) \ + V(asp) \ + V(bsp) + +#define DEFINE_MACRO_ASM_FUNCS(SUFFIX) \ + void Paci##SUFFIX() { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + paci##SUFFIX(); \ + } \ + void Auti##SUFFIX() { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + auti##SUFFIX(); \ + } + + PAUTH_SYSTEM_MODES(DEFINE_MACRO_ASM_FUNCS) +#undef DEFINE_MACRO_ASM_FUNCS + + // The 1716 pac and aut instructions encourage people to use x16 and x17 + // directly, perhaps without realising that this is forbidden. For example: + // + // UseScratchRegisterScope temps(&masm); + // Register temp = temps.AcquireX(); // temp will be x16 + // __ Mov(x17, ptr); + // __ Mov(x16, modifier); // Will override temp! + // __ Pacia1716(); + // + // To work around this issue, you must exclude x16 and x17 from the scratch + // register list. You may need to replace them with other registers: + // + // UseScratchRegisterScope temps(&masm); + // temps.Exclude(x16, x17); + // temps.Include(x10, x11); + // __ Mov(x17, ptr); + // __ Mov(x16, modifier); + // __ Pacia1716(); + void Pacia1716() { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16)); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17)); + SingleEmissionCheckScope guard(this); + pacia1716(); + } + void Pacib1716() { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16)); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17)); + SingleEmissionCheckScope guard(this); + pacib1716(); + } + void Autia1716() { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16)); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17)); + SingleEmissionCheckScope guard(this); + autia1716(); + } + void Autib1716() { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x16)); + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(x17)); + SingleEmissionCheckScope guard(this); + autib1716(); + } + void Xpaclri() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + xpaclri(); + } + void Clrex() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clrex(); + } + void Cls(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cls(rd, rn); + } + void Clz(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + clz(rd, rn); + } + void Cneg(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + cneg(rd, rn, cond); + } + void Esb() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + esb(); + } + void Csdb() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + csdb(); + } + void Cset(const Register& rd, Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + cset(rd, cond); + } + void Csetm(const Register& rd, Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + csetm(rd, cond); + } + void Csinc(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + csinc(rd, rn, rm, cond); + } + void Csinv(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + csinv(rd, rn, rm, cond); + } + void Csneg(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + csneg(rd, rn, rm, cond); + } + void Dmb(BarrierDomain domain, BarrierType type) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dmb(domain, type); + } + void Dsb(BarrierDomain domain, BarrierType type) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dsb(domain, type); + } + void Extr(const Register& rd, + const Register& rn, + const Register& rm, + unsigned lsb) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + extr(rd, rn, rm, lsb); + } + void Fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fadd(vd, vn, vm); + } + void Fccmp(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond, + FPTrapFlags trap = DisableTrap) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + FPCCompareMacro(vn, vm, nzcv, cond, trap); + } + void Fccmpe(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond) { + Fccmp(vn, vm, nzcv, cond, EnableTrap); + } + void Fcmp(const VRegister& vn, + const VRegister& vm, + FPTrapFlags trap = DisableTrap) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + FPCompareMacro(vn, vm, trap); + } + void Fcmp(const VRegister& vn, double value, FPTrapFlags trap = DisableTrap); + void Fcmpe(const VRegister& vn, double value); + void Fcmpe(const VRegister& vn, const VRegister& vm) { + Fcmp(vn, vm, EnableTrap); + } + void Fcsel(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT((cond != al) && (cond != nv)); + SingleEmissionCheckScope guard(this); + fcsel(vd, vn, vm, cond); + } + void Fcvt(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvt(vd, vn); + } + void Fcvtl(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtl(vd, vn); + } + void Fcvtl2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtl2(vd, vn); + } + void Fcvtn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtn(vd, vn); + } + void Fcvtn2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtn2(vd, vn); + } + void Fcvtxn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtxn(vd, vn); + } + void Fcvtxn2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtxn2(vd, vn); + } + void Fcvtas(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtas(rd, vn); + } + void Fcvtau(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtau(rd, vn); + } + void Fcvtms(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtms(rd, vn); + } + void Fcvtmu(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtmu(rd, vn); + } + void Fcvtns(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtns(rd, vn); + } + void Fcvtnu(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtnu(rd, vn); + } + void Fcvtps(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtps(rd, vn); + } + void Fcvtpu(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtpu(rd, vn); + } + void Fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtzs(rd, vn, fbits); + } + void Fjcvtzs(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fjcvtzs(rd, vn); + } + void Fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fcvtzu(rd, vn, fbits); + } + void Fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fdiv(vd, vn, vm); + } + void Fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmax(vd, vn, vm); + } + void Fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmaxnm(vd, vn, vm); + } + void Fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmin(vd, vn, vm); + } + void Fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fminnm(vd, vn, vm); + } + void Fmov(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + // TODO: Use DiscardMoveMode to allow this move to be elided if vd.Is(vn). + fmov(vd, vn); + } + void Fmov(const VRegister& vd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + fmov(vd, rn); + } + void Fmov(const VRegister& vd, int index, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (vd.Is1D() && (index == 0)) { + mov(vd, index, rn); + } else { + fmov(vd, index, rn); + } + } + void Fmov(const Register& rd, const VRegister& vn, int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (vn.Is1D() && (index == 0)) { + mov(rd, vn, index); + } else { + fmov(rd, vn, index); + } + } + + // Provide explicit double and float interfaces for FP immediate moves, rather + // than relying on implicit C++ casts. This allows signalling NaNs to be + // preserved when the immediate matches the format of vd. Most systems convert + // signalling NaNs to quiet NaNs when converting between float and double. + void Fmov(VRegister vd, double imm); + void Fmov(VRegister vd, float imm); + void Fmov(VRegister vd, const Float16 imm); + // Provide a template to allow other types to be converted automatically. + template + void Fmov(VRegister vd, T imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fmov(vd, static_cast(imm)); + } + void Fmov(Register rd, VRegister vn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + fmov(rd, vn); + } + void Fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmul(vd, vn, vm); + } + void Fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fnmul(vd, vn, vm); + } + void Fmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmadd(vd, vn, vm, va); + } + void Fmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmsub(vd, vn, vm, va); + } + void Fnmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fnmadd(vd, vn, vm, va); + } + void Fnmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fnmsub(vd, vn, vm, va); + } + void Fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fsub(vd, vn, vm); + } + void Hint(SystemHint code) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + hint(code); + } + void Hint(int imm7) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + hint(imm7); + } + void Hlt(int code) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + hlt(code); + } + void Isb() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + isb(); + } + void Ldar(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldar(rt, src); + } + void Ldarb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldarb(rt, src); + } + void Ldarh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldarh(rt, src); + } + void Ldlar(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldlar(rt, src); + } + void Ldlarb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldlarb(rt, src); + } + void Ldlarh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldlarh(rt, src); + } + void Ldaxp(const Register& rt, const Register& rt2, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + ldaxp(rt, rt2, src); + } + void Ldaxr(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldaxr(rt, src); + } + void Ldaxrb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldaxrb(rt, src); + } + void Ldaxrh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldaxrh(rt, src); + } + +// clang-format off +#define COMPARE_AND_SWAP_SINGLE_MACRO_LIST(V) \ + V(cas, Cas) \ + V(casa, Casa) \ + V(casl, Casl) \ + V(casal, Casal) \ + V(casb, Casb) \ + V(casab, Casab) \ + V(caslb, Caslb) \ + V(casalb, Casalb) \ + V(cash, Cash) \ + V(casah, Casah) \ + V(caslh, Caslh) \ + V(casalh, Casalh) +// clang-format on + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rt, src); \ + } + COMPARE_AND_SWAP_SINGLE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + + +// clang-format off +#define COMPARE_AND_SWAP_PAIR_MACRO_LIST(V) \ + V(casp, Casp) \ + V(caspa, Caspa) \ + V(caspl, Caspl) \ + V(caspal, Caspal) +// clang-format on + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const Register& rs, \ + const Register& rs2, \ + const Register& rt, \ + const Register& rt2, \ + const MemOperand& src) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rs2, rt, rt2, src); \ + } + COMPARE_AND_SWAP_PAIR_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +// These macros generate all the variations of the atomic memory operations, +// e.g. ldadd, ldadda, ldaddb, staddl, etc. + +// clang-format off +#define ATOMIC_MEMORY_SIMPLE_MACRO_LIST(V, DEF, MASM_PRE, ASM_PRE) \ + V(DEF, MASM_PRE##add, ASM_PRE##add) \ + V(DEF, MASM_PRE##clr, ASM_PRE##clr) \ + V(DEF, MASM_PRE##eor, ASM_PRE##eor) \ + V(DEF, MASM_PRE##set, ASM_PRE##set) \ + V(DEF, MASM_PRE##smax, ASM_PRE##smax) \ + V(DEF, MASM_PRE##smin, ASM_PRE##smin) \ + V(DEF, MASM_PRE##umax, ASM_PRE##umax) \ + V(DEF, MASM_PRE##umin, ASM_PRE##umin) + +#define ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \ + V(MASM, ASM) \ + V(MASM##l, ASM##l) \ + V(MASM##b, ASM##b) \ + V(MASM##lb, ASM##lb) \ + V(MASM##h, ASM##h) \ + V(MASM##lh, ASM##lh) + +#define ATOMIC_MEMORY_LOAD_MACRO_MODES(V, MASM, ASM) \ + ATOMIC_MEMORY_STORE_MACRO_MODES(V, MASM, ASM) \ + V(MASM##a, ASM##a) \ + V(MASM##al, ASM##al) \ + V(MASM##ab, ASM##ab) \ + V(MASM##alb, ASM##alb) \ + V(MASM##ah, ASM##ah) \ + V(MASM##alh, ASM##alh) +// clang-format on + +#define DEFINE_MACRO_LOAD_ASM_FUNC(MASM, ASM) \ + void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rt, src); \ + } +#define DEFINE_MACRO_STORE_ASM_FUNC(MASM, ASM) \ + void MASM(const Register& rs, const MemOperand& src) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, src); \ + } + + ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_LOAD_MACRO_MODES, + DEFINE_MACRO_LOAD_ASM_FUNC, + Ld, + ld) + ATOMIC_MEMORY_SIMPLE_MACRO_LIST(ATOMIC_MEMORY_STORE_MACRO_MODES, + DEFINE_MACRO_STORE_ASM_FUNC, + St, + st) + +#define DEFINE_MACRO_SWP_ASM_FUNC(MASM, ASM) \ + void MASM(const Register& rs, const Register& rt, const MemOperand& src) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(rs, rt, src); \ + } + + ATOMIC_MEMORY_LOAD_MACRO_MODES(DEFINE_MACRO_SWP_ASM_FUNC, Swp, swp) + +#undef DEFINE_MACRO_LOAD_ASM_FUNC +#undef DEFINE_MACRO_STORE_ASM_FUNC +#undef DEFINE_MACRO_SWP_ASM_FUNC + + void Ldaprb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + VIXL_ASSERT(src.IsImmediateOffset()); + if (src.GetOffset() == 0) { + ldaprb(rt, src); + } else { + ldapurb(rt, src); + } + } + + void Ldapursb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldapursb(rt, src); + } + + void Ldaprh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + VIXL_ASSERT(src.IsImmediateOffset()); + if (src.GetOffset() == 0) { + ldaprh(rt, src); + } else { + ldapurh(rt, src); + } + } + + void Ldapursh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldapursh(rt, src); + } + + void Ldapr(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + VIXL_ASSERT(src.IsImmediateOffset()); + if (src.GetOffset() == 0) { + ldapr(rt, src); + } else { + ldapur(rt, src); + } + } + + void Ldapursw(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldapursw(rt, src); + } + + void Ldnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnp(rt, rt2, src); + } + // Provide both double and float interfaces for FP immediate loads, rather + // than relying on implicit C++ casts. This allows signalling NaNs to be + // preserved when the immediate matches the format of fd. Most systems convert + // signalling NaNs to quiet NaNs when converting between float and double. + void Ldr(const VRegister& vt, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + RawLiteral* literal; + if (vt.IsD()) { + literal = new Literal(imm, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool); + } else { + literal = new Literal(static_cast(imm), + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool); + } + ldr(vt, literal); + } + void Ldr(const VRegister& vt, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + RawLiteral* literal; + if (vt.IsS()) { + literal = new Literal(imm, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool); + } else { + literal = new Literal(static_cast(imm), + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool); + } + ldr(vt, literal); + } + void Ldr(const VRegister& vt, uint64_t high64, uint64_t low64) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(vt.IsQ()); + SingleEmissionCheckScope guard(this); + ldr(vt, + new Literal(high64, + low64, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool)); + } + void Ldr(const Register& rt, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + RawLiteral* literal; + if (rt.Is64Bits()) { + literal = new Literal(imm, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool); + } else { + VIXL_ASSERT(rt.Is32Bits()); + VIXL_ASSERT(IsUint32(imm) || IsInt32(imm)); + literal = new Literal(static_cast(imm), + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool); + } + ldr(rt, literal); + } + void Ldrsw(const Register& rt, uint32_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + ldrsw(rt, + new Literal(imm, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool)); + } + void Ldr(const CPURegister& rt, RawLiteral* literal) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldr(rt, literal); + } + void Ldrsw(const Register& rt, RawLiteral* literal) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldrsw(rt, literal); + } + void Ldxp(const Register& rt, const Register& rt2, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + ldxp(rt, rt2, src); + } + void Ldxr(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldxr(rt, src); + } + void Ldxrb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldxrb(rt, src); + } + void Ldxrh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldxrh(rt, src); + } + void Lsl(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + lsl(rd, rn, shift); + } + void Lsl(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + lslv(rd, rn, rm); + } + void Lsr(const Register& rd, const Register& rn, unsigned shift) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + lsr(rd, rn, shift); + } + void Lsr(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + lsrv(rd, rn, rm); + } + void Ldraa(const Register& xt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldraa(xt, src); + } + void Ldrab(const Register& xt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldrab(xt, src); + } + void Madd(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + madd(rd, rn, rm, ra); + } + void Mneg(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + mneg(rd, rn, rm); + } + void Mov(const Register& rd, + const Register& rn, + DiscardMoveMode discard_mode = kDontDiscardForSameWReg) { + VIXL_ASSERT(allow_macro_instructions_); + // Emit a register move only if the registers are distinct, or if they are + // not X registers. + // + // Note that mov(w0, w0) is not a no-op because it clears the top word of + // x0. A flag is provided (kDiscardForSameWReg) if a move between the same W + // registers is not required to clear the top word of the X register. In + // this case, the instruction is discarded. + // + // If the sp is an operand, add #0 is emitted, otherwise, orr #0. + if (!rd.Is(rn) || + (rd.Is32Bits() && (discard_mode == kDontDiscardForSameWReg))) { + SingleEmissionCheckScope guard(this); + mov(rd, rn); + } + } + void Movk(const Register& rd, uint64_t imm, int shift = -1) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + SingleEmissionCheckScope guard(this); + movk(rd, imm, shift); + } + void Mrs(const Register& rt, SystemRegister sysreg) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + mrs(rt, sysreg); + } + void Msr(SystemRegister sysreg, const Register& rt) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + SingleEmissionCheckScope guard(this); + msr(sysreg, rt); + } + void Cfinv() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cfinv(); + } + void Axflag() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + axflag(); + } + void Xaflag() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + xaflag(); + } + void Sys(int op1, int crn, int crm, int op2, const Register& rt = xzr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sys(op1, crn, crm, op2, rt); + } + void Dc(DataCacheOp op, const Register& rt) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dc(op, rt); + } + void Ic(InstructionCacheOp op, const Register& rt) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ic(op, rt); + } + void Msub(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + msub(rd, rn, rm, ra); + } + void Mul(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + mul(rd, rn, rm); + } + void Nop() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nop(); + } + void Rbit(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rbit(rd, rn); + } + void Ret(const Register& xn = lr) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!xn.IsZero()); + SingleEmissionCheckScope guard(this); + ret(xn); + } + void Rev(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rev(rd, rn); + } + void Rev16(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rev16(rd, rn); + } + void Rev32(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rev32(rd, rn); + } + void Rev64(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + rev64(rd, rn); + } + +#define PAUTH_MASM_VARIATIONS(V) \ + V(Paci, paci) \ + V(Pacd, pacd) \ + V(Auti, auti) \ + V(Autd, autd) + +#define DEFINE_MACRO_ASM_FUNCS(MASM_PRE, ASM_PRE) \ + void MASM_PRE##a(const Register& xd, const Register& xn) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM_PRE##a(xd, xn); \ + } \ + void MASM_PRE##za(const Register& xd) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM_PRE##za(xd); \ + } \ + void MASM_PRE##b(const Register& xd, const Register& xn) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM_PRE##b(xd, xn); \ + } \ + void MASM_PRE##zb(const Register& xd) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM_PRE##zb(xd); \ + } + + PAUTH_MASM_VARIATIONS(DEFINE_MACRO_ASM_FUNCS) +#undef DEFINE_MACRO_ASM_FUNCS + + void Pacga(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + pacga(xd, xn, xm); + } + + void Xpaci(const Register& xd) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + xpaci(xd); + } + + void Xpacd(const Register& xd) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + xpacd(xd); + } + void Ror(const Register& rd, const Register& rs, unsigned shift) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rs.IsZero()); + SingleEmissionCheckScope guard(this); + ror(rd, rs, shift); + } + void Ror(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + rorv(rd, rn, rm); + } + void Sbfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sbfiz(rd, rn, lsb, width); + } + void Sbfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sbfm(rd, rn, immr, imms); + } + void Sbfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sbfx(rd, rn, lsb, width); + } + void Scvtf(const VRegister& vd, const Register& rn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + scvtf(vd, rn, fbits); + } + void Sdiv(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + sdiv(rd, rn, rm); + } + void Smaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + smaddl(rd, rn, rm, ra); + } + void Smsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + smsubl(rd, rn, rm, ra); + } + void Smull(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + smull(rd, rn, rm); + } + void Smulh(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!xd.IsZero()); + VIXL_ASSERT(!xn.IsZero()); + VIXL_ASSERT(!xm.IsZero()); + SingleEmissionCheckScope guard(this); + smulh(xd, xn, xm); + } + void Stlr(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + VIXL_ASSERT(dst.IsImmediateOffset()); + if (dst.GetOffset() == 0) { + stlr(rt, dst); + } else { + stlur(rt, dst); + } + } + void Stlrb(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + VIXL_ASSERT(dst.IsImmediateOffset()); + if (dst.GetOffset() == 0) { + stlrb(rt, dst); + } else { + stlurb(rt, dst); + } + } + void Stlrh(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + VIXL_ASSERT(dst.IsImmediateOffset()); + if (dst.GetOffset() == 0) { + stlrh(rt, dst); + } else { + stlurh(rt, dst); + } + } + void Stllr(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stllr(rt, dst); + } + void Stllrb(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stllrb(rt, dst); + } + void Stllrh(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stllrh(rt, dst); + } + void Stlxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + VIXL_ASSERT(!rs.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + stlxp(rs, rt, rt2, dst); + } + void Stlxr(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stlxr(rs, rt, dst); + } + void Stlxrb(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stlxrb(rs, rt, dst); + } + void Stlxrh(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stlxrh(rs, rt, dst); + } + void Stnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stnp(rt, rt2, dst); + } + void Stxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + VIXL_ASSERT(!rs.Aliases(rt2)); + SingleEmissionCheckScope guard(this); + stxp(rs, rt, rt2, dst); + } + void Stxr(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stxr(rs, rt, dst); + } + void Stxrb(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stxrb(rs, rt, dst); + } + void Stxrh(const Register& rs, const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rs.Aliases(dst.GetBaseRegister())); + VIXL_ASSERT(!rs.Aliases(rt)); + SingleEmissionCheckScope guard(this); + stxrh(rs, rt, dst); + } + void Svc(int code) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + svc(code); + } + void Sxtb(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sxtb(rd, rn); + } + void Sxth(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sxth(rd, rn); + } + void Sxtw(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + sxtw(rd, rn); + } + void Tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vm); + } + void Tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vn2, vm); + } + void Tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vn2, vn3, vm); + } + void Tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(vd, vn, vn2, vn3, vn4, vm); + } + void Tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vm); + } + void Tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vn2, vm); + } + void Tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vn2, vn3, vm); + } + void Tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbx(vd, vn, vn2, vn3, vn4, vm); + } + void Tbnz(const Register& rt, unsigned bit_pos, Label* label); + void Tbz(const Register& rt, unsigned bit_pos, Label* label); + void Ubfiz(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ubfiz(rd, rn, lsb, width); + } + void Ubfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ubfm(rd, rn, immr, imms); + } + void Ubfx(const Register& rd, + const Register& rn, + unsigned lsb, + unsigned width) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ubfx(rd, rn, lsb, width); + } + void Ucvtf(const VRegister& vd, const Register& rn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + ucvtf(vd, rn, fbits); + } + void Udiv(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + udiv(rd, rn, rm); + } + void Umaddl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + umaddl(rd, rn, rm, ra); + } + void Umull(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + SingleEmissionCheckScope guard(this); + umull(rd, rn, rm); + } + void Umulh(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!xd.IsZero()); + VIXL_ASSERT(!xn.IsZero()); + VIXL_ASSERT(!xm.IsZero()); + SingleEmissionCheckScope guard(this); + umulh(xd, xn, xm); + } + void Umsubl(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + VIXL_ASSERT(!rm.IsZero()); + VIXL_ASSERT(!ra.IsZero()); + SingleEmissionCheckScope guard(this); + umsubl(rd, rn, rm, ra); + } + void Unreachable() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (generate_simulator_code_) { + hlt(kUnreachableOpcode); + } else { + // Use the architecturally-defined UDF instruction to abort on hardware, + // because using HLT and BRK tends to make the process difficult to debug. + udf(kUnreachableOpcode); + } + } + void Uxtb(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + uxtb(rd, rn); + } + void Uxth(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + uxth(rd, rn); + } + void Uxtw(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rd.IsZero()); + VIXL_ASSERT(!rn.IsZero()); + SingleEmissionCheckScope guard(this); + uxtw(rd, rn); + } + + void Addg(const Register& xd, + const Register& xn, + int offset, + int tag_offset) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + addg(xd, xn, offset, tag_offset); + } + void Gmi(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + gmi(xd, xn, xm); + } + void Irg(const Register& xd, const Register& xn, const Register& xm = xzr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + irg(xd, xn, xm); + } + void Subg(const Register& xd, + const Register& xn, + int offset, + int tag_offset) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + subg(xd, xn, offset, tag_offset); + } + void Subp(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + subp(xd, xn, xm); + } + void Subps(const Register& xd, const Register& xn, const Register& xm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + subps(xd, xn, xm); + } + void Cmpp(const Register& xn, const Register& xm) { Subps(xzr, xn, xm); } + +// NEON 3 vector register instructions. +#define NEON_3VREG_MACRO_LIST(V) \ + V(add, Add) \ + V(addhn, Addhn) \ + V(addhn2, Addhn2) \ + V(addp, Addp) \ + V(and_, And) \ + V(bic, Bic) \ + V(bif, Bif) \ + V(bit, Bit) \ + V(bsl, Bsl) \ + V(cmeq, Cmeq) \ + V(cmge, Cmge) \ + V(cmgt, Cmgt) \ + V(cmhi, Cmhi) \ + V(cmhs, Cmhs) \ + V(cmtst, Cmtst) \ + V(eor, Eor) \ + V(fabd, Fabd) \ + V(facge, Facge) \ + V(facgt, Facgt) \ + V(faddp, Faddp) \ + V(fcmeq, Fcmeq) \ + V(fcmge, Fcmge) \ + V(fcmgt, Fcmgt) \ + V(fmaxnmp, Fmaxnmp) \ + V(fmaxp, Fmaxp) \ + V(fminnmp, Fminnmp) \ + V(fminp, Fminp) \ + V(fmla, Fmla) \ + V(fmlal, Fmlal) \ + V(fmlal2, Fmlal2) \ + V(fmls, Fmls) \ + V(fmlsl, Fmlsl) \ + V(fmlsl2, Fmlsl2) \ + V(fmulx, Fmulx) \ + V(frecps, Frecps) \ + V(frsqrts, Frsqrts) \ + V(mla, Mla) \ + V(mls, Mls) \ + V(mul, Mul) \ + V(orn, Orn) \ + V(orr, Orr) \ + V(pmul, Pmul) \ + V(pmull, Pmull) \ + V(pmull2, Pmull2) \ + V(raddhn, Raddhn) \ + V(raddhn2, Raddhn2) \ + V(rsubhn, Rsubhn) \ + V(rsubhn2, Rsubhn2) \ + V(saba, Saba) \ + V(sabal, Sabal) \ + V(sabal2, Sabal2) \ + V(sabd, Sabd) \ + V(sabdl, Sabdl) \ + V(sabdl2, Sabdl2) \ + V(saddl, Saddl) \ + V(saddl2, Saddl2) \ + V(saddw, Saddw) \ + V(saddw2, Saddw2) \ + V(shadd, Shadd) \ + V(shsub, Shsub) \ + V(smax, Smax) \ + V(smaxp, Smaxp) \ + V(smin, Smin) \ + V(sminp, Sminp) \ + V(smlal, Smlal) \ + V(smlal2, Smlal2) \ + V(smlsl, Smlsl) \ + V(smlsl2, Smlsl2) \ + V(smull, Smull) \ + V(smull2, Smull2) \ + V(sqadd, Sqadd) \ + V(sqdmlal, Sqdmlal) \ + V(sqdmlal2, Sqdmlal2) \ + V(sqdmlsl, Sqdmlsl) \ + V(sqdmlsl2, Sqdmlsl2) \ + V(sqdmulh, Sqdmulh) \ + V(sqdmull, Sqdmull) \ + V(sqdmull2, Sqdmull2) \ + V(sqrdmulh, Sqrdmulh) \ + V(sdot, Sdot) \ + V(sqrdmlah, Sqrdmlah) \ + V(udot, Udot) \ + V(sqrdmlsh, Sqrdmlsh) \ + V(sqrshl, Sqrshl) \ + V(sqshl, Sqshl) \ + V(sqsub, Sqsub) \ + V(srhadd, Srhadd) \ + V(srshl, Srshl) \ + V(sshl, Sshl) \ + V(ssubl, Ssubl) \ + V(ssubl2, Ssubl2) \ + V(ssubw, Ssubw) \ + V(ssubw2, Ssubw2) \ + V(sub, Sub) \ + V(subhn, Subhn) \ + V(subhn2, Subhn2) \ + V(trn1, Trn1) \ + V(trn2, Trn2) \ + V(uaba, Uaba) \ + V(uabal, Uabal) \ + V(uabal2, Uabal2) \ + V(uabd, Uabd) \ + V(uabdl, Uabdl) \ + V(uabdl2, Uabdl2) \ + V(uaddl, Uaddl) \ + V(uaddl2, Uaddl2) \ + V(uaddw, Uaddw) \ + V(uaddw2, Uaddw2) \ + V(uhadd, Uhadd) \ + V(uhsub, Uhsub) \ + V(umax, Umax) \ + V(umaxp, Umaxp) \ + V(umin, Umin) \ + V(uminp, Uminp) \ + V(umlal, Umlal) \ + V(umlal2, Umlal2) \ + V(umlsl, Umlsl) \ + V(umlsl2, Umlsl2) \ + V(umull, Umull) \ + V(umull2, Umull2) \ + V(uqadd, Uqadd) \ + V(uqrshl, Uqrshl) \ + V(uqshl, Uqshl) \ + V(uqsub, Uqsub) \ + V(urhadd, Urhadd) \ + V(urshl, Urshl) \ + V(ushl, Ushl) \ + V(usubl, Usubl) \ + V(usubl2, Usubl2) \ + V(usubw, Usubw) \ + V(usubw2, Usubw2) \ + V(uzp1, Uzp1) \ + V(uzp2, Uzp2) \ + V(zip1, Zip1) \ + V(zip2, Zip2) \ + V(smmla, Smmla) \ + V(ummla, Ummla) \ + V(usmmla, Usmmla) \ + V(usdot, Usdot) + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, const VRegister& vn, const VRegister& vm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, vm); \ + } + NEON_3VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +// NEON 2 vector register instructions. +#define NEON_2VREG_MACRO_LIST(V) \ + V(abs, Abs) \ + V(addp, Addp) \ + V(addv, Addv) \ + V(cls, Cls) \ + V(clz, Clz) \ + V(cnt, Cnt) \ + V(fabs, Fabs) \ + V(faddp, Faddp) \ + V(fcvtas, Fcvtas) \ + V(fcvtau, Fcvtau) \ + V(fcvtms, Fcvtms) \ + V(fcvtmu, Fcvtmu) \ + V(fcvtns, Fcvtns) \ + V(fcvtnu, Fcvtnu) \ + V(fcvtps, Fcvtps) \ + V(fcvtpu, Fcvtpu) \ + V(fmaxnmp, Fmaxnmp) \ + V(fmaxnmv, Fmaxnmv) \ + V(fmaxp, Fmaxp) \ + V(fmaxv, Fmaxv) \ + V(fminnmp, Fminnmp) \ + V(fminnmv, Fminnmv) \ + V(fminp, Fminp) \ + V(fminv, Fminv) \ + V(fneg, Fneg) \ + V(frecpe, Frecpe) \ + V(frecpx, Frecpx) \ + V(frint32x, Frint32x) \ + V(frint32z, Frint32z) \ + V(frint64x, Frint64x) \ + V(frint64z, Frint64z) \ + V(frinta, Frinta) \ + V(frinti, Frinti) \ + V(frintm, Frintm) \ + V(frintn, Frintn) \ + V(frintp, Frintp) \ + V(frintx, Frintx) \ + V(frintz, Frintz) \ + V(frsqrte, Frsqrte) \ + V(fsqrt, Fsqrt) \ + V(mov, Mov) \ + V(mvn, Mvn) \ + V(neg, Neg) \ + V(not_, Not) \ + V(rbit, Rbit) \ + V(rev16, Rev16) \ + V(rev32, Rev32) \ + V(rev64, Rev64) \ + V(sadalp, Sadalp) \ + V(saddlp, Saddlp) \ + V(saddlv, Saddlv) \ + V(smaxv, Smaxv) \ + V(sminv, Sminv) \ + V(sqabs, Sqabs) \ + V(sqneg, Sqneg) \ + V(sqxtn, Sqxtn) \ + V(sqxtn2, Sqxtn2) \ + V(sqxtun, Sqxtun) \ + V(sqxtun2, Sqxtun2) \ + V(suqadd, Suqadd) \ + V(sxtl, Sxtl) \ + V(sxtl2, Sxtl2) \ + V(uadalp, Uadalp) \ + V(uaddlp, Uaddlp) \ + V(uaddlv, Uaddlv) \ + V(umaxv, Umaxv) \ + V(uminv, Uminv) \ + V(uqxtn, Uqxtn) \ + V(uqxtn2, Uqxtn2) \ + V(urecpe, Urecpe) \ + V(ursqrte, Ursqrte) \ + V(usqadd, Usqadd) \ + V(uxtl, Uxtl) \ + V(uxtl2, Uxtl2) \ + V(xtn, Xtn) \ + V(xtn2, Xtn2) + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, const VRegister& vn) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn); \ + } + NEON_2VREG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +// NEON 2 vector register with immediate instructions. +#define NEON_2VREG_FPIMM_MACRO_LIST(V) \ + V(fcmeq, Fcmeq) \ + V(fcmge, Fcmge) \ + V(fcmgt, Fcmgt) \ + V(fcmle, Fcmle) \ + V(fcmlt, Fcmlt) + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, const VRegister& vn, double imm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, imm); \ + } + NEON_2VREG_FPIMM_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +// NEON by element instructions. +#define NEON_BYELEMENT_MACRO_LIST(V) \ + V(fmul, Fmul) \ + V(fmla, Fmla) \ + V(fmlal, Fmlal) \ + V(fmlal2, Fmlal2) \ + V(fmls, Fmls) \ + V(fmlsl, Fmlsl) \ + V(fmlsl2, Fmlsl2) \ + V(fmulx, Fmulx) \ + V(mul, Mul) \ + V(mla, Mla) \ + V(mls, Mls) \ + V(sqdmulh, Sqdmulh) \ + V(sqrdmulh, Sqrdmulh) \ + V(sdot, Sdot) \ + V(sqrdmlah, Sqrdmlah) \ + V(udot, Udot) \ + V(sqrdmlsh, Sqrdmlsh) \ + V(sqdmull, Sqdmull) \ + V(sqdmull2, Sqdmull2) \ + V(sqdmlal, Sqdmlal) \ + V(sqdmlal2, Sqdmlal2) \ + V(sqdmlsl, Sqdmlsl) \ + V(sqdmlsl2, Sqdmlsl2) \ + V(smull, Smull) \ + V(smull2, Smull2) \ + V(smlal, Smlal) \ + V(smlal2, Smlal2) \ + V(smlsl, Smlsl) \ + V(smlsl2, Smlsl2) \ + V(umull, Umull) \ + V(umull2, Umull2) \ + V(umlal, Umlal) \ + V(umlal2, Umlal2) \ + V(umlsl, Umlsl) \ + V(umlsl2, Umlsl2) \ + V(sudot, Sudot) \ + V(usdot, Usdot) + + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, vm, vm_index); \ + } + NEON_BYELEMENT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +#define NEON_2VREG_SHIFT_MACRO_LIST(V) \ + V(rshrn, Rshrn) \ + V(rshrn2, Rshrn2) \ + V(shl, Shl) \ + V(shll, Shll) \ + V(shll2, Shll2) \ + V(shrn, Shrn) \ + V(shrn2, Shrn2) \ + V(sli, Sli) \ + V(sqrshrn, Sqrshrn) \ + V(sqrshrn2, Sqrshrn2) \ + V(sqrshrun, Sqrshrun) \ + V(sqrshrun2, Sqrshrun2) \ + V(sqshl, Sqshl) \ + V(sqshlu, Sqshlu) \ + V(sqshrn, Sqshrn) \ + V(sqshrn2, Sqshrn2) \ + V(sqshrun, Sqshrun) \ + V(sqshrun2, Sqshrun2) \ + V(sri, Sri) \ + V(srshr, Srshr) \ + V(srsra, Srsra) \ + V(sshr, Sshr) \ + V(ssra, Ssra) \ + V(uqrshrn, Uqrshrn) \ + V(uqrshrn2, Uqrshrn2) \ + V(uqshl, Uqshl) \ + V(uqshrn, Uqshrn) \ + V(uqshrn2, Uqshrn2) \ + V(urshr, Urshr) \ + V(ursra, Ursra) \ + V(ushr, Ushr) \ + V(usra, Usra) + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const VRegister& vd, const VRegister& vn, int shift) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + ASM(vd, vn, shift); \ + } + NEON_2VREG_SHIFT_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +#define NEON_2VREG_SHIFT_LONG_MACRO_LIST(V) \ + V(shll, sshll, Sshll) \ + V(shll, ushll, Ushll) \ + V(shll2, sshll2, Sshll2) \ + V(shll2, ushll2, Ushll2) + +#define DEFINE_MACRO_ASM_FUNC(ASM1, ASM2, MASM) \ + void MASM(const VRegister& vd, const VRegister& vn, int shift) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SingleEmissionCheckScope guard(this); \ + if (vn.GetLaneSizeInBits() == static_cast(shift)) { \ + ASM1(vd, vn, shift); \ + } else { \ + ASM2(vd, vn, shift); \ + } \ + } + NEON_2VREG_SHIFT_LONG_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + +// SVE 3 vector register instructions. +#define SVE_3VREG_COMMUTATIVE_MACRO_LIST(V) \ + V(add, Add) \ + V(and_, And) \ + V(eor, Eor) \ + V(mul, Mul) \ + V(orr, Orr) \ + V(sabd, Sabd) \ + V(shadd, Shadd) \ + V(smax, Smax) \ + V(smin, Smin) \ + V(smulh, Smulh) \ + V(sqadd, Sqadd) \ + V(srhadd, Srhadd) \ + V(uabd, Uabd) \ + V(uhadd, Uhadd) \ + V(umax, Umax) \ + V(umin, Umin) \ + V(umulh, Umulh) \ + V(uqadd, Uqadd) \ + V(urhadd, Urhadd) + +#define DEFINE_MACRO_ASM_FUNC(ASM, MASM) \ + void MASM(const ZRegister& zd, \ + const PRegisterM& pg, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + if (zd.Aliases(zn)) { \ + SingleEmissionCheckScope guard(this); \ + ASM(zd, pg, zd, zm); \ + } else if (zd.Aliases(zm)) { \ + SingleEmissionCheckScope guard(this); \ + ASM(zd, pg, zd, zn); \ + } else { \ + MovprfxHelperScope guard(this, zd, pg, zn); \ + ASM(zd, pg, zd, zm); \ + } \ + } + SVE_3VREG_COMMUTATIVE_MACRO_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + + void Bic(const VRegister& vd, const int imm8, const int left_shift = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bic(vd, imm8, left_shift); + } + void Cmeq(const VRegister& vd, const VRegister& vn, int imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmeq(vd, vn, imm); + } + void Cmge(const VRegister& vd, const VRegister& vn, int imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmge(vd, vn, imm); + } + void Cmgt(const VRegister& vd, const VRegister& vn, int imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmgt(vd, vn, imm); + } + void Cmle(const VRegister& vd, const VRegister& vn, int imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmle(vd, vn, imm); + } + void Cmlt(const VRegister& vd, const VRegister& vn, int imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmlt(vd, vn, imm); + } + void Dup(const VRegister& vd, const VRegister& vn, int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dup(vd, vn, index); + } + void Dup(const VRegister& vd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dup(vd, rn); + } + void Ext(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ext(vd, vn, vm, index); + } + void Fcadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcadd(vd, vn, vm, rot); + } + void Fcmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmla(vd, vn, vm, vm_index, rot); + } + void Fcmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmla(vd, vn, vm, rot); + } + void Ins(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ins(vd, vd_index, vn, vn_index); + } + void Ins(const VRegister& vd, int vd_index, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ins(vd, vd_index, rn); + } + void Ld1(const VRegister& vt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld1(vt, src); + } + void Ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld1(vt, vt2, src); + } + void Ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld1(vt, vt2, vt3, src); + } + void Ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld1(vt, vt2, vt3, vt4, src); + } + void Ld1(const VRegister& vt, int lane, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld1(vt, lane, src); + } + void Ld1r(const VRegister& vt, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld1r(vt, src); + } + void Ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2(vt, vt2, src); + } + void Ld2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2(vt, vt2, lane, src); + } + void Ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2r(vt, vt2, src); + } + void Ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3(vt, vt2, vt3, src); + } + void Ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3(vt, vt2, vt3, lane, src); + } + void Ld3r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3r(vt, vt2, vt3, src); + } + void Ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4(vt, vt2, vt3, vt4, src); + } + void Ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4(vt, vt2, vt3, vt4, lane, src); + } + void Ld4r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4r(vt, vt2, vt3, vt4, src); + } + void Mov(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(vd, vd_index, vn, vn_index); + } + void Mov(const VRegister& vd, const VRegister& vn, int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(vd, vn, index); + } + void Mov(const VRegister& vd, int vd_index, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(vd, vd_index, rn); + } + void Mov(const Register& rd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(rd, vn, vn_index); + } + void Movi(const VRegister& vd, + uint64_t imm, + Shift shift = LSL, + int shift_amount = 0); + void Movi(const VRegister& vd, uint64_t hi, uint64_t lo); + void Mvni(const VRegister& vd, + const int imm8, + Shift shift = LSL, + const int shift_amount = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mvni(vd, imm8, shift, shift_amount); + } + void Orr(const VRegister& vd, const int imm8, const int left_shift = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orr(vd, imm8, left_shift); + } + void Scvtf(const VRegister& vd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + scvtf(vd, vn, fbits); + } + void Ucvtf(const VRegister& vd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ucvtf(vd, vn, fbits); + } + void Fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtzs(vd, vn, fbits); + } + void Fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtzu(vd, vn, fbits); + } + void St1(const VRegister& vt, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st1(vt, dst); + } + void St1(const VRegister& vt, const VRegister& vt2, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st1(vt, vt2, dst); + } + void St1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st1(vt, vt2, vt3, dst); + } + void St1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st1(vt, vt2, vt3, vt4, dst); + } + void St1(const VRegister& vt, int lane, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st1(vt, lane, dst); + } + void St2(const VRegister& vt, const VRegister& vt2, const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2(vt, vt2, dst); + } + void St3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3(vt, vt2, vt3, dst); + } + void St4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4(vt, vt2, vt3, vt4, dst); + } + void St2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2(vt, vt2, lane, dst); + } + void St3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3(vt, vt2, vt3, lane, dst); + } + void St4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& dst) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4(vt, vt2, vt3, vt4, lane, dst); + } + void Smov(const Register& rd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smov(rd, vn, vn_index); + } + void Umov(const Register& rd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umov(rd, vn, vn_index); + } + void Crc32b(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32b(rd, rn, rm); + } + void Crc32h(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32h(rd, rn, rm); + } + void Crc32w(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32w(rd, rn, rm); + } + void Crc32x(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32x(rd, rn, rm); + } + void Crc32cb(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32cb(rd, rn, rm); + } + void Crc32ch(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32ch(rd, rn, rm); + } + void Crc32cw(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32cw(rd, rn, rm); + } + void Crc32cx(const Register& rd, const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + crc32cx(rd, rn, rm); + } + + // Scalable Vector Extensions. + void Abs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + abs(zd, pg, zn); + } + void Add(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + add(zd, zn, zm); + } + void Add(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubHelper(kAddImmediate, zd, zn, imm); + } + void Addpl(const Register& xd, const Register& xn, int64_t multiplier); + void Addvl(const Register& xd, const Register& xn, int64_t multiplier); + // Note that unlike the core ISA, SVE's `adr` is not PC-relative. + void Adr(const ZRegister& zd, const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + adr(zd, addr); + } + void And(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + and_(pd, pg, pn, pm); + } + void And(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + and_(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void And(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + and_(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Ands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ands(pd, pg, pn, pm); + } + void Andv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + andv(vd, pg, zn); + } + void Asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + asr(zd, pg, zd, shift); + } + void Asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Asr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + asr(zd, zn, shift); + } + void Asr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + asr(zd, zn, zm); + } + void Asrd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + asrd(zd, pg, zd, shift); + } + void Bic(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Bic(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bic(pd, pg, pn, pm); + } + void Bic(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + bic(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + bic(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Bics(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bics(pd, pg, pn, pm); + } + void Brka(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brka(pd, pg, pn); + } + void Brkas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkas(pd, pg, pn); + } + void Brkb(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkb(pd, pg, pn); + } + void Brkbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkbs(pd, pg, pn); + } + void Brkn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + if (!pd.Aliases(pm)) { + Mov(pd, pm); + } + SingleEmissionCheckScope guard(this); + brkn(pd, pg, pn, pd); + } + void Brkns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + if (!pd.Aliases(pm)) { + Mov(pd, pm); + } + SingleEmissionCheckScope guard(this); + brkns(pd, pg, pn, pd); + } + void Brkpa(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpa(pd, pg, pn, pm); + } + void Brkpas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpas(pd, pg, pn, pm); + } + void Brkpb(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpb(pd, pg, pn, pm); + } + void Brkpbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + brkpbs(pd, pg, pn, pm); + } + void Clasta(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clasta(rd, pg, rn, zm); + } + void Clasta(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clasta(vd, pg, vn, zm); + } + void Clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + void Clastb(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clastb(rd, pg, rn, zm); + } + void Clastb(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clastb(vd, pg, vn, zm); + } + void Clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + void Cls(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cls(zd, pg, zn); + } + void Clz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + clz(zd, pg, zn); + } + void Cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpeq(pd, pg, zn, zm); + } + void Cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpeq(pd, pg, zn, imm5); + } else { + CompareHelper(eq, pd, pg, zn, imm); + } + } + void Cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpge(pd, pg, zn, zm); + } + void Cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpge(pd, pg, zn, imm5); + } else { + CompareHelper(ge, pd, pg, zn, imm); + } + } + void Cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpgt(pd, pg, zn, zm); + } + void Cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpgt(pd, pg, zn, imm5); + } else { + CompareHelper(gt, pd, pg, zn, imm); + } + } + void Cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmphi(pd, pg, zn, zm); + } + void Cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmphi(pd, pg, zn, static_cast(imm.AsUintN(7))); + } else { + CompareHelper(hi, pd, pg, zn, imm); + } + } + void Cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmphs(pd, pg, zn, zm); + } + void Cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmphs(pd, pg, zn, static_cast(imm.AsUintN(7))); + } else { + CompareHelper(hs, pd, pg, zn, imm); + } + } + void Cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmple(pd, pg, zn, zm); + } + void Cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmple(pd, pg, zn, imm5); + } else { + CompareHelper(le, pd, pg, zn, imm); + } + } + void Cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmplo(pd, pg, zn, zm); + } + void Cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmplo(pd, pg, zn, static_cast(imm.AsUintN(7))); + } else { + CompareHelper(lo, pd, pg, zn, imm); + } + } + void Cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpls(pd, pg, zn, zm); + } + void Cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + if (imm.IsUintN(7)) { + SingleEmissionCheckScope guard(this); + cmpls(pd, pg, zn, static_cast(imm.AsUintN(7))); + } else { + CompareHelper(ls, pd, pg, zn, imm); + } + } + void Cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmplt(pd, pg, zn, zm); + } + void Cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmplt(pd, pg, zn, imm5); + } else { + CompareHelper(lt, pd, pg, zn, imm); + } + } + void Cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cmpne(pd, pg, zn, zm); + } + void Cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + int imm5; + if (imm.TryEncodeAsIntNForLane<5>(zn, &imm5)) { + SingleEmissionCheckScope guard(this); + cmpne(pd, pg, zn, imm5); + } else { + CompareHelper(ne, pd, pg, zn, imm); + } + } + void Cnot(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cnot(zd, pg, zn); + } + void Cnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cnt(zd, pg, zn); + } + void Cntb(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cntb(rd, pattern, multiplier); + } + void Cntd(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cntd(rd, pattern, multiplier); + } + void Cnth(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cnth(rd, pattern, multiplier); + } + void Cntp(const Register& rd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + // The `cntp` instruction architecturally takes an X register, but the + // result will always be in the range [0, kPRegMaxSize] (and therefore + // always fits in a W register), so we can accept a W-sized rd here. + cntp(rd.X(), pg, pn); + } + void Cntw(const Register& rd, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cntw(rd, pattern, multiplier); + } + void Compact(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + compact(zd, pg, zn); + } + void Cpy(const ZRegister& zd, const PRegister& pg, IntegerOperand imm); + void Cpy(const ZRegister& zd, const PRegisterM& pg, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpy(zd, pg, rn); + } + void Cpy(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpy(zd, pg, vn); + } + void Ctermeq(const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ctermeq(rn, rm); + } + void Ctermne(const Register& rn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ctermne(rn, rm); + } + void Decb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decb(rdn, pattern, multiplier); + } + void Decd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decd(rdn, pattern, multiplier); + } + void Decd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decd(zdn, pattern, multiplier); + } + void Dech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dech(rdn, pattern, multiplier); + } + void Dech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dech(zdn, pattern, multiplier); + } + void Decp(const Register& rdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decp(rdn, pg); + } + void Decp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `decp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + decp(zd, pg); + } + void Decp(const ZRegister& zdn, const PRegister& pg) { Decp(zdn, pg, zdn); } + void Decw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decw(rdn, pattern, multiplier); + } + void Decw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + decw(zdn, pattern, multiplier); + } + void Dup(const ZRegister& zd, const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dup(zd, xn); + } + void Dup(const ZRegister& zd, const ZRegister& zn, int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + dup(zd, zn, index); + } + void Dup(const ZRegister& zd, IntegerOperand imm); + void Eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + eon(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Eor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eor(pd, pg, pn, pm); + } + void Eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + eor(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Eor(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + eor(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Eors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eors(pd, pg, pn, pm); + } + void Eorv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eorv(vd, pg, zn); + } + void Ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ext(zd, zn, zm, offset); + } + void Fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fabs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fabs(zd, pg, zn); + } + void Facge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facge(pd, pg, zn, zm); + } + void Facgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facgt(pd, pg, zn, zm); + } + void Facle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facge(pd, pg, zm, zn); + } + void Faclt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + facgt(pd, pg, zm, zn); + } + void Fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fadd(zd, pg, zd, imm); + } + void Fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fadd(zd, zn, zm); + } + void Fadda(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fadda(vd, pg, vn, zm); + } + void Faddv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + faddv(vd, pg, zn); + } + void Fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmeq(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmeq(pd, pg, zn, zm); + } + void Fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmge(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmge(pd, pg, zn, zm); + } + void Fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmgt(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmgt(pd, pg, zn, zm); + } + void Fcmla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Fcmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmla(zda, zn, zm, index, rot); + } + void Fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmle(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmge(pd, pg, zm, zn); + } + void Fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmlt(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmgt(pd, pg, zm, zn); + } + void Fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (zero == 0.0) { + fcmne(pd, pg, zn, zero); + } else { + // TODO: Synthesise other immediates. + VIXL_UNIMPLEMENTED(); + } + } + void Fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmne(pd, pg, zn, zm); + } + void Fcmuo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcmuo(pd, pg, zn, zm); + } + void Fcpy(const ZRegister& zd, const PRegisterM& pg, double imm); + void Fcpy(const ZRegister& zd, const PRegisterM& pg, float imm); + void Fcpy(const ZRegister& zd, const PRegisterM& pg, Float16 imm); + void Fcvt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvt(zd, pg, zn); + } + void Fcvt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + // The element type in this predicated movprfx is determined by the larger + // type between the source and destination. + int lane_size = std::max(zd.GetLaneSizeInBits(), zn.GetLaneSizeInBits()); + MovprfxHelperScope guard(this, + zd.WithLaneSize(lane_size), + pg, + zn.WithLaneSize(lane_size)); + fcvt(zd, pg.Merging(), zn); + } + void Fcvtzs(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtzs(zd, pg, zn); + } + void Fcvtzu(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtzu(zd, pg, zn); + } + void Fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fdup(const ZRegister& zd, double imm); + void Fdup(const ZRegister& zd, float imm); + void Fdup(const ZRegister& zd, Float16 imm); + void Fexpa(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fexpa(zd, zn); + } + void Fmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmad(zdn, pg, zm, za); + } + void Fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmax(zd, pg, zd, imm); + } + void Fmax( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmaxnm(zd, pg, zd, imm); + } + void Fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fmaxnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmaxnmv(vd, pg, zn); + } + void Fmaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmaxv(vd, pg, zn); + } + void Fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmin(zd, pg, zd, imm); + } + void Fmin( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fminnm(zd, pg, zd, imm); + } + void Fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fminnmv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fminnmv(vd, pg, zn); + } + void Fminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fminv(vd, pg, zn); + } + // zd = za + (zn * zm) + void Fmla( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + // zd = za - (zn * zm) + void Fmls( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fmls(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmov(const ZRegister& zd, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fdup(zd, imm); + } + void Fmov(const ZRegister& zd, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fdup(zd, imm); + } + void Fmov(const ZRegister& zd, Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fdup(zd, imm); + } + void Fmov(const ZRegister& zd, const PRegisterM& pg, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fcpy(zd, pg, imm); + } + void Fmov(const ZRegister& zd, const PRegisterM& pg, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fcpy(zd, pg, imm); + } + void Fmov(const ZRegister& zd, const PRegisterM& pg, Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + Fcpy(zd, pg, imm); + } + void Fmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmsb(zdn, pg, zm, za); + } + void Fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fmul(zd, pg, zd, imm); + } + void Fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmul(zd, zn, zm, index); + } + void Fmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fmul(zd, zn, zm); + } + void Fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option); + void Fneg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fneg(zd, pg, zn); + } + void Fnmla( + const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Fnmls( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option = NoFPMacroNaNPropagationSelected); + void Frecpe(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frecpe(zd, zn); + } + void Frecps(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frecps(zd, zn, zm); + } + void Frecpx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frecpx(zd, pg, zn); + } + void Frecpx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frecpx(zd, pg.Merging(), zn); + } + void Frinta(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frinta(zd, pg, zn); + } + void Frinta(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frinta(zd, pg.Merging(), zn); + } + void Frinti(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frinti(zd, pg, zn); + } + void Frinti(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frinti(zd, pg.Merging(), zn); + } + void Frintm(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintm(zd, pg, zn); + } + void Frintm(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintm(zd, pg.Merging(), zn); + } + void Frintn(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintn(zd, pg, zn); + } + void Frintn(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintn(zd, pg.Merging(), zn); + } + void Frintp(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintp(zd, pg, zn); + } + void Frintp(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintp(zd, pg.Merging(), zn); + } + void Frintx(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintx(zd, pg, zn); + } + void Frintx(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintx(zd, pg.Merging(), zn); + } + void Frintz(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frintz(zd, pg, zn); + } + void Frintz(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + frintz(zd, pg.Merging(), zn); + } + void Frsqrte(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frsqrte(zd, zn); + } + void Frsqrts(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + frsqrts(zd, zn, zm); + } + void Fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fsqrt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fsqrt(zd, pg, zn); + } + void Fsqrt(const ZRegister& zd, const PRegisterZ& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fsqrt(zd, pg.Merging(), zn); + } + void Fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fsub(zd, pg, zd, imm); + } + void Fsub(const ZRegister& zd, + const PRegisterM& pg, + double imm, + const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + fsubr(zd, pg, zd, imm); + } + void Fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fsub(zd, zn, zm); + } + void Ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3); + void Ftsmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ftsmul(zd, zn, zm); + } + void Ftssel(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ftssel(zd, zn, zm); + } + void Incb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incb(rdn, pattern, multiplier); + } + void Incd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incd(rdn, pattern, multiplier); + } + void Incd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incd(zdn, pattern, multiplier); + } + void Inch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + inch(rdn, pattern, multiplier); + } + void Inch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + inch(zdn, pattern, multiplier); + } + void Incp(const Register& rdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incp(rdn, pg); + } + void Incp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `incp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + incp(zd, pg); + } + void Incp(const ZRegister& zdn, const PRegister& pg) { Incp(zdn, pg, zdn); } + void Incw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incw(rdn, pattern, multiplier); + } + void Incw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + incw(zdn, pattern, multiplier); + } + void Index(const ZRegister& zd, const Operand& start, const Operand& step); + void Insr(const ZRegister& zdn, const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + insr(zdn, rm); + } + void Insr(const ZRegister& zdn, const VRegister& vm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + insr(zdn, vm); + } + void Insr(const ZRegister& zdn, IntegerOperand imm); + void Lasta(const Register& rd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lasta(rd, pg, zn); + } + void Lasta(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lasta(vd, pg, zn); + } + void Lastb(const Register& rd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lastb(rd, pg, zn); + } + void Lastb(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lastb(vd, pg, zn); + } + void Ld1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rb, + kBRegSizeInBytes); + } + void Ld1rh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rh, + kHRegSizeInBytes); + } + void Ld1rw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rw, + kSRegSizeInBytes); + } + void Ld1rd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rd, + kDRegSizeInBytes); + } + void Ld1rqb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rqd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rqh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rqw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rob(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rod(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1roh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1row(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1rsb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rsb, + kBRegSizeInBytes); + } + void Ld1rsh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rsh, + kHRegSizeInBytes); + } + void Ld1rsw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadBroadcastImmHelper(zt, + pg, + addr, + &MacroAssembler::ld1rsw, + kSRegSizeInBytes); + } + void Ld1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ld2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2b(zt1, zt2, pg, addr); + } + void Ld2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2h(zt1, zt2, pg, addr); + } + void Ld2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2w(zt1, zt2, pg, addr); + } + void Ld2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld2d(zt1, zt2, pg, addr); + } + void Ld3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3b(zt1, zt2, zt3, pg, addr); + } + void Ld3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3h(zt1, zt2, zt3, pg, addr); + } + void Ld3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3w(zt1, zt2, zt3, pg, addr); + } + void Ld3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld3d(zt1, zt2, zt3, pg, addr); + } + void Ld4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4b(zt1, zt2, zt3, zt4, pg, addr); + } + void Ld4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4h(zt1, zt2, zt3, zt4, pg, addr); + } + void Ld4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4w(zt1, zt2, zt3, zt4, pg, addr); + } + void Ld4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ld4d(zt1, zt2, zt3, zt4, pg, addr); + } + void Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1b(zt, pg, xn, zm); + } + void Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1b(zt, pg, zn, imm5); + } + void Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1d(zt, pg, xn, zm); + } + void Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1d(zt, pg, zn, imm5); + } + void Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1h(zt, pg, xn, zm); + } + void Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1h(zt, pg, zn, imm5); + } + void Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sb(zt, pg, xn, zm); + } + void Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sb(zt, pg, zn, imm5); + } + void Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sh(zt, pg, xn, zm); + } + void Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sh(zt, pg, zn, imm5); + } + void Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sw(zt, pg, xn, zm); + } + void Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1sw(zt, pg, zn, imm5); + } + void Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1w(zt, pg, xn, zm); + } + void Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldff1w(zt, pg, zn, imm5); + } + void Ldnf1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1b(zt, pg, addr); + } + void Ldnf1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1d(zt, pg, addr); + } + void Ldnf1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1h(zt, pg, addr); + } + void Ldnf1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1sb(zt, pg, addr); + } + void Ldnf1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1sh(zt, pg, addr); + } + void Ldnf1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1sw(zt, pg, addr); + } + void Ldnf1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnf1w(zt, pg, addr); + } + void Ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + void Ldr(const CPURegister& rt, const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::ldr); + } + void Lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + lsl(zd, pg, zd, shift); + } + void Lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Lsl(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsl(zd, zn, shift); + } + void Lsl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsl(zd, zn, zm); + } + void Lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + lsr(zd, pg, zd, shift); + } + void Lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Lsr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsr(zd, zn, shift); + } + void Lsr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + lsr(zd, zn, zm); + } + void Mov(const PRegister& pd, const PRegister& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(pd.VnB(), pn.VnB()); + } + void Mov(const PRegisterWithLaneSize& pd, + const PRegisterM& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(pd, pg, pn); + } + void Mov(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(pd, pg, pn); + } + void Mov(const ZRegister& zd, const Register& xn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, xn); + } + + void Mov(const ZRegister& zd, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, vn); + } + + void Mov(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, zn); + } + void Mov(const ZRegister& zd, const ZRegister& zn, unsigned index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, zn, index); + } + void Mov(const ZRegister& zd, const PRegister& pg, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + Cpy(zd, pg, imm); + } + // TODO: support zeroing predicated moves using movprfx. + void Mov(const ZRegister& zd, const PRegisterM& pg, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, pg, rn); + } + void Mov(const ZRegister& zd, const PRegisterM& pg, const VRegister& vn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, pg, vn); + } + void Mov(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mov(zd, pg, zn); + } + void Mov(const ZRegister& zd, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + Dup(zd, imm); + } + void Movs(const PRegister& pd, const PRegister& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + movs(pd, pn); + } + void Movs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + movs(pd, pg, pn); + } + // zd = za + (zn * zm) + void Mla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + // zd = za - (zn * zm) + void Mls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Mul(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Nand(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nand(pd, pg, pn, pm); + } + void Nands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nands(pd, pg, pn, pm); + } + // There is no instruction with this form, but we can implement it using + // `subr`. + void Neg(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, zn); + subr(zd, zd, 0); + } + void Neg(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + neg(zd, pg, zn); + } + void Nor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nor(pd, pg, pn, pm); + } + void Nors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nors(pd, pg, pn, pm); + } + void Not(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + not_(pd, pg, pn); + } + void Not(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + not_(zd, pg, zn); + } + void Nots(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nots(pd, pg, pn); + } + void Orn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orn(pd, pg, pn, pm); + } + void Orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + orn(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Orns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orns(pd, pg, pn, pm); + } + void Orr(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orr(pd, pg, pn, pm); + } + void Orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + if (IsImmLogical(imm, zd.GetLaneSizeInBits())) { + orr(zd, zn, imm); + } else { + // TODO: Synthesise the immediate once 'Mov' is implemented. + VIXL_UNIMPLEMENTED(); + } + } + void Orr(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + SingleEmissionCheckScope guard(this); + orr(zd.VnD(), zn.VnD(), zm.VnD()); + } + void Orrs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orrs(pd, pg, pn, pm); + } + void Orv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + orv(vd, pg, zn); + } + void Pfalse(const PRegister& pd) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pd.IsUnqualified()); + SingleEmissionCheckScope guard(this); + // No matter what the lane size is, overall this operation just writes zeros + // throughout the register. + pfalse(pd.VnB()); + } + void Pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + void Pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn); + void Prfb(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfb(prfop, pg, addr); + } + void Prfh(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfh(prfop, pg, addr); + } + void Prfw(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfw(prfop, pg, addr); + } + void Prfd(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + prfd(prfop, pg, addr); + } + void Ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ptest(pg, pn); + } + void Ptrue(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern, + FlagsUpdate s); + void Ptrue(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern = SVE_ALL) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ptrue(pd, pattern); + } + void Ptrues(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern = SVE_ALL) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ptrues(pd, pattern); + } + void Punpkhi(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + punpkhi(pd, pn); + } + void Punpklo(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + punpklo(pd, pn); + } + void Rbit(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rbit(zd, pg, zn); + } + void Rdffr(const PRegister& pd) { + VIXL_ASSERT(allow_macro_instructions_); + // Although this is essentially just a move, it writes every bit and so can + // only support b-sized lane because other lane sizes would simplicity clear + // bits in `pd`. + VIXL_ASSERT(!pd.HasLaneSize() || pd.IsLaneSizeB()); + VIXL_ASSERT(pd.IsUnqualified()); + SingleEmissionCheckScope guard(this); + rdffr(pd.VnB()); + } + void Rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rdffr(pd, pg); + } + void Rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rdffrs(pd, pg); + } + // Note that there is no `rdpl` instruction, but this macro emulates it (for + // symmetry with `Rdvl`). + void Rdpl(const Register& xd, int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + Addpl(xd, xzr, multiplier); + } + void Rdvl(const Register& xd, int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + Addvl(xd, xzr, multiplier); + } + void Rev(const PRegisterWithLaneSize& pd, const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rev(pd, pn); + } + void Rev(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rev(zd, zn); + } + void Revb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + revb(zd, pg, zn); + } + void Revh(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + revh(zd, pg, zn); + } + void Revw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + revw(zd, pg, zn); + } + void Saddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddv(dd, pg, zn); + } + void Scvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + scvtf(zd, pg, zn); + } + void Sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sel(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sel(pd, pg, pn, pm); + } + void Sel(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sel(zd, pg, zn, zm); + } + void Setffr() { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setffr(); + } + void Smax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Smaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smaxv(vd, pg, zn); + } + void Smin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Sminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sminv(vd, pg, zn); + } + void Splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqadd(zd, zn, zm); + } + void Sqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + sqadd(zd, zd, imm.AsUint16()); + } + void Sqdecb(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecb(xd, wn, pattern, multiplier); + } + void Sqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecb(rdn, pattern, multiplier); + } + void Sqdecd(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecd(xd, wn, pattern, multiplier); + } + void Sqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecd(rdn, pattern, multiplier); + } + void Sqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecd(zdn, pattern, multiplier); + } + void Sqdech(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdech(xd, wn, pattern, multiplier); + } + void Sqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdech(rdn, pattern, multiplier); + } + void Sqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdech(zdn, pattern, multiplier); + } + void Sqdecp(const Register& xdn, + const PRegisterWithLaneSize& pg, + const Register& wdn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecp(xdn, pg, wdn); + } + void Sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecp(xdn, pg); + } + void Sqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqdecp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + sqdecp(zd, pg); + } + void Sqdecp(const ZRegister& zdn, const PRegister& pg) { + Sqdecp(zdn, pg, zdn); + } + void Sqdecw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecw(xd, wn, pattern, multiplier); + } + void Sqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecw(rdn, pattern, multiplier); + } + void Sqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdecw(zdn, pattern, multiplier); + } + void Sqincb(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincb(xd, wn, pattern, multiplier); + } + void Sqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincb(rdn, pattern, multiplier); + } + void Sqincd(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincd(xd, wn, pattern, multiplier); + } + void Sqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincd(rdn, pattern, multiplier); + } + void Sqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincd(zdn, pattern, multiplier); + } + void Sqinch(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqinch(xd, wn, pattern, multiplier); + } + void Sqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqinch(rdn, pattern, multiplier); + } + void Sqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqinch(zdn, pattern, multiplier); + } + void Sqincp(const Register& xdn, + const PRegisterWithLaneSize& pg, + const Register& wdn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincp(xdn, pg, wdn); + } + void Sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincp(xdn, pg); + } + void Sqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqincp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + sqincp(zd, pg); + } + void Sqincp(const ZRegister& zdn, const PRegister& pg) { + Sqincp(zdn, pg, zdn); + } + void Sqincw(const Register& xd, + const Register& wn, + int pattern = SVE_ALL, + int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincw(xd, wn, pattern, multiplier); + } + void Sqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincw(rdn, pattern, multiplier); + } + void Sqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqincw(zdn, pattern, multiplier); + } + void Sqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqsub(zd, zn, zm); + } + void Sqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + sqsub(zd, zd, imm.AsUint16()); + } + void St1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void St2b(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2b(zt1, zt2, pg, addr); + } + void St2h(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2h(zt1, zt2, pg, addr); + } + void St2w(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2w(zt1, zt2, pg, addr); + } + void St2d(const ZRegister& zt1, + const ZRegister& zt2, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2d(zt1, zt2, pg, addr); + } + void St3b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3b(zt1, zt2, zt3, pg, addr); + } + void St3h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3h(zt1, zt2, zt3, pg, addr); + } + void St3w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3w(zt1, zt2, zt3, pg, addr); + } + void St3d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st3d(zt1, zt2, zt3, pg, addr); + } + void St4b(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4b(zt1, zt2, zt3, zt4, pg, addr); + } + void St4h(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4h(zt1, zt2, zt3, zt4, pg, addr); + } + void St4w(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4w(zt1, zt2, zt3, zt4, pg, addr); + } + void St4d(const ZRegister& zt1, + const ZRegister& zt2, + const ZRegister& zt3, + const ZRegister& zt4, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st4d(zt1, zt2, zt3, zt4, pg, addr); + } + void Stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + void Str(const CPURegister& rt, const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStoreScalarImmHelper(rt, addr, &MacroAssembler::str); + } + void Sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sub(zd, zn, zm); + } + void Sub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubHelper(kSubImmediate, zd, zn, imm); + } + void Sub(const ZRegister& zd, IntegerOperand imm, const ZRegister& zm); + void Sunpkhi(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sunpkhi(zd, zn); + } + void Sunpklo(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sunpklo(zd, zn); + } + void Sxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sxtb(zd, pg, zn); + } + void Sxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sxth(zd, pg, zn); + } + void Sxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sxtw(zd, pg, zn); + } + void Tbl(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(zd, zn, zm); + } + void Trn1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn1(pd, pn, pm); + } + void Trn1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn1(zd, zn, zm); + } + void Trn2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn2(pd, pn, pm); + } + void Trn2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + trn2(zd, zn, zm); + } + void Uaddv(const VRegister& dd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddv(dd, pg, zn); + } + void Ucvtf(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ucvtf(zd, pg, zn); + } + void Udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Udot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Udot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umax(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Umaxv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umaxv(vd, pg, zn); + } + void Umin(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm); + void Uminv(const VRegister& vd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uminv(vd, pg, zn); + } + void Uqadd(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqadd(zd, zn, zm); + } + void Uqadd(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + uqadd(zd, zd, imm.AsUint16()); + } + void Uqdecb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecb(rdn, pattern, multiplier); + } + void Uqdecd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecd(rdn, pattern, multiplier); + } + void Uqdecd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecd(zdn, pattern, multiplier); + } + void Uqdech(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdech(rdn, pattern, multiplier); + } + void Uqdech(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdech(zdn, pattern, multiplier); + } + // The saturation is based on the size of `rn`. The result is zero-extended + // into `rd`, which must be at least as big. + void Uqdecp(const Register& rd, + const PRegisterWithLaneSize& pg, + const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(rd.Aliases(rn)); + VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes()); + SingleEmissionCheckScope guard(this); + if (rn.Is64Bits()) { + uqdecp(rd, pg); + } else { + // Convert into , to make this more consistent with Sqdecp. + uqdecp(rd.W(), pg); + } + } + void Uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) { + Uqdecp(rdn, pg, rdn); + } + void Uqdecp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqdecp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + uqdecp(zd, pg); + } + void Uqdecp(const ZRegister& zdn, const PRegister& pg) { + Uqdecp(zdn, pg, zdn); + } + void Uqdecw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecw(rdn, pattern, multiplier); + } + void Uqdecw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqdecw(zdn, pattern, multiplier); + } + void Uqincb(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincb(rdn, pattern, multiplier); + } + void Uqincd(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincd(rdn, pattern, multiplier); + } + void Uqincd(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincd(zdn, pattern, multiplier); + } + void Uqinch(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqinch(rdn, pattern, multiplier); + } + void Uqinch(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqinch(zdn, pattern, multiplier); + } + // The saturation is based on the size of `rn`. The result is zero-extended + // into `rd`, which must be at least as big. + void Uqincp(const Register& rd, + const PRegisterWithLaneSize& pg, + const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(rd.Aliases(rn)); + VIXL_ASSERT(rd.GetSizeInBytes() >= rn.GetSizeInBytes()); + SingleEmissionCheckScope guard(this); + if (rn.Is64Bits()) { + uqincp(rd, pg); + } else { + // Convert into , to make this more consistent with Sqincp. + uqincp(rd.W(), pg); + } + } + void Uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) { + Uqincp(rdn, pg, rdn); + } + void Uqincp(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(zd, zn)); + // `sqincp` writes every lane, so use an unpredicated movprfx. + MovprfxHelperScope guard(this, zd, zn); + uqincp(zd, pg); + } + void Uqincp(const ZRegister& zdn, const PRegister& pg) { + Uqincp(zdn, pg, zdn); + } + void Uqincw(const Register& rdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincw(rdn, pattern, multiplier); + } + void Uqincw(const ZRegister& zdn, int pattern = SVE_ALL, int multiplier = 1) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqincw(zdn, pattern, multiplier); + } + void Uqsub(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqsub(zd, zn, zm); + } + void Uqsub(const ZRegister& zd, const ZRegister& zn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.IsUint8() || + (imm.IsUint16() && ((imm.AsUint16() & 0xff) == 0))); + MovprfxHelperScope guard(this, zd, zn); + uqsub(zd, zd, imm.AsUint16()); + } + void Uunpkhi(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uunpkhi(zd, zn); + } + void Uunpklo(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uunpklo(zd, zn); + } + void Uxtb(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uxtb(zd, pg, zn); + } + void Uxth(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uxth(zd, pg, zn); + } + void Uxtw(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uxtw(zd, pg, zn); + } + void Uzp1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp1(pd, pn, pm); + } + void Uzp1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp1(zd, zn, zm); + } + void Uzp2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp2(pd, pn, pm); + } + void Uzp2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uzp2(zd, zn, zm); + } + void Whilele(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilele(pd, rn, rm); + } + void Whilelo(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilelo(pd, rn, rm); + } + void Whilels(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilels(pd, rn, rm); + } + void Whilelt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilelt(pd, rn, rm); + } + void Wrffr(const PRegister& pn) { + VIXL_ASSERT(allow_macro_instructions_); + // Although this is essentially just a move, it writes every bit and so can + // only support b-sized lane because other lane sizes would implicitly clear + // bits in `ffr`. + VIXL_ASSERT(!pn.HasLaneSize() || pn.IsLaneSizeB()); + VIXL_ASSERT(pn.IsUnqualified()); + SingleEmissionCheckScope guard(this); + wrffr(pn.VnB()); + } + void Zip1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip1(pd, pn, pm); + } + void Zip1(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip1(zd, zn, zm); + } + void Zip2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip2(pd, pn, pm); + } + void Zip2(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + zip2(zd, zn, zm); + } + + // SVE2 + void Adclb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Adclt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Addhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + addhnb(zd, zn, zm); + } + void Addhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + addhnt(zd, zn, zm); + } + void Addp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Bcax(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Bdep(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bdep(zd, zn, zm); + } + void Bext(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bext(zd, zn, zm); + } + void Bgrp(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + bgrp(zd, zn, zm); + } + void Bsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Bsl1n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Bsl2n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + void Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Cmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + void Cmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Eor3(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Eorbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eorbt(zd, zn, zm); + } + void Eortb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + eortb(zd, zn, zm); + } + void Faddp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fcvtlt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtlt(zd, pg, zn); + } + void Fcvtnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtnt(zd, pg, zn); + } + void Fcvtx(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(zn.IsLaneSizeD()); + MovprfxHelperScope guard(this, zd.VnD(), pg, zd.VnD()); + fcvtx(zd, pg.Merging(), zn); + } + void Fcvtxnt(const ZRegister& zd, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + fcvtxnt(zd, pg, zn); + } + void Flogb(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + flogb(zd, pg.Merging(), zn); + } + void Fmaxnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fmaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fminnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Fmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Fmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Fmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Histcnt(const ZRegister& zd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + histcnt(zd, pg, zn, zm); + } + void Histseg(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + histseg(zd, zn, zm); + } + void Ldnt1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnt1sb(zt, pg, addr); + } + void Ldnt1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnt1sh(zt, pg, addr); + } + void Ldnt1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldnt1sw(zt, pg, addr); + } + void Match(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + match(pd, pg, zn, zm); + } + void Mla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Mls(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mul(zd, zn, zm, index); + } + void Mul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + mul(zd, zn, zm); + } + void Nbsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk); + void Nmatch(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + nmatch(pd, pg, zn, zm); + } + void Pmul(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + pmul(zd, zn, zm); + } + void Pmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + pmullb(zd, zn, zm); + } + void Pmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + pmullt(zd, zn, zm); + } + void Raddhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + raddhnb(zd, zn, zm); + } + void Raddhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + raddhnt(zd, zn, zm); + } + void Rshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rshrnb(zd, zn, shift); + } + void Rshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rshrnt(zd, zn, shift); + } + void Rsubhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rsubhnb(zd, zn, zm); + } + void Rsubhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rsubhnt(zd, zn, zm); + } + void Saba(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sabalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sabalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sabdlb(zd, zn, zm); + } + void Sabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sabdlt(zd, zn, zm); + } + void Sadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sadalp(zda, pg, zn); + } + void Saddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddlb(zd, zn, zm); + } + void Saddlbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddlbt(zd, zn, zm); + } + void Saddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddlt(zd, zn, zm); + } + void Saddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddwb(zd, zn, zm); + } + void Saddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + saddwt(zd, zn, zm); + } + void Sbclb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sbclt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Shrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + shrnb(zd, zn, shift); + } + void Shrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + shrnt(zd, zn, shift); + } + void Shsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sli(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sli(zd, zn, shift); + } + void Smaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Smlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Smlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smulh(zd, zn, zm); + } + void Smullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullb(zd, zn, zm, index); + } + void Smullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullb(zd, zn, zm); + } + void Smullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullt(zd, zn, zm, index); + } + void Smullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + smullt(zd, zn, zm); + } + void Sqabs(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + sqabs(zd, pg.Merging(), zn); + } + void Sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Sqdmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlalbt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlslbt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqdmlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmulh(zd, zn, zm, index); + } + void Sqdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmulh(zd, zn, zm); + } + void Sqdmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullb(zd, zn, zm, index); + } + void Sqdmullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullb(zd, zn, zm); + } + void Sqdmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullt(zd, zn, zm, index); + } + void Sqdmullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqdmullt(zd, zn, zm); + } + void Sqneg(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + sqneg(zd, pg.Merging(), zn); + } + void Sqrdcmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot); + void Sqrdcmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot); + void Sqrdmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqrdmlah(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqrdmlsh(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Sqrdmlsh(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrdmulh(zd, zn, zm, index); + } + void Sqrdmulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrdmulh(zd, zn, zm); + } + void Sqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrnb(zd, zn, shift); + } + void Sqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrnt(zd, zn, shift); + } + void Sqrshrunb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrunb(zd, zn, shift); + } + void Sqrshrunt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqrshrunt(zd, zn, shift); + } + void Sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + sqshl(zd, pg, zd, shift); + } + void Sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqshlu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + sqshlu(zd, pg, zd, shift); + } + void Sqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrnb(zd, zn, shift); + } + void Sqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrnt(zd, zn, shift); + } + void Sqshrunb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrunb(zd, zn, shift); + } + void Sqshrunt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqshrunt(zd, zn, shift); + } + void Sqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Sqxtnb(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtnb(zd, zn); + } + void Sqxtnt(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtnt(zd, zn); + } + void Sqxtunb(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtunb(zd, zn); + } + void Sqxtunt(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sqxtunt(zd, zn); + } + void Sri(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sri(zd, zn, shift); + } + void Srshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Srshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + srshr(zd, pg, zd, shift); + } + void Srsra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Sshllb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sshllb(zd, zn, shift); + } + void Sshllt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sshllt(zd, zn, shift); + } + void Ssra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Ssublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssublb(zd, zn, zm); + } + void Ssublbt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssublbt(zd, zn, zm); + } + void Ssublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssublt(zd, zn, zm); + } + void Ssubltb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssubltb(zd, zn, zm); + } + void Ssubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssubwb(zd, zn, zm); + } + void Ssubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ssubwt(zd, zn, zm); + } + void Subhnb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + subhnb(zd, zn, zm); + } + void Subhnt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + subhnt(zd, zn, zm); + } + void Suqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Tbl(const ZRegister& zd, + const ZRegister& zn1, + const ZRegister& zn2, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbl(zd, zn1, zn2, zm); + } + void Tbx(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + tbx(zd, zn, zm); + } + void Uaba(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Uabalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Uabalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Uabdlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uabdlb(zd, zn, zm); + } + void Uabdlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uabdlt(zd, zn, zm); + } + void Uadalp(const ZRegister& zda, const PRegisterM& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uadalp(zda, pg, zn); + } + void Uaddlb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddlb(zd, zn, zm); + } + void Uaddlt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddlt(zd, zn, zm); + } + void Uaddwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddwb(zd, zn, zm); + } + void Uaddwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uaddwt(zd, zn, zm); + } + void Uhsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Umaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Umlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlalb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlalt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlslb(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Umlslt(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Umulh(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umulh(zd, zn, zm); + } + void Umullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullb(zd, zn, zm, index); + } + void Umullb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullb(zd, zn, zm); + } + void Umullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullt(zd, zn, zm, index); + } + void Umullt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + umullt(zd, zn, zm); + } + void Uqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uqrshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqrshrnb(zd, zn, shift); + } + void Uqrshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqrshrnt(zd, zn, shift); + } + void Uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + uqshl(zd, pg, zd, shift); + } + void Uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uqshrnb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqshrnb(zd, zn, shift); + } + void Uqshrnt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqshrnt(zd, zn, shift); + } + void Uqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Uqxtnb(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqxtnb(zd, zn); + } + void Uqxtnt(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + uqxtnt(zd, zn); + } + void Urecpe(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + urecpe(zd, pg.Merging(), zn); + } + void Urshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Urshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zn); + urshr(zd, pg, zd, shift); + } + void Ursqrte(const ZRegister& zd, const PRegister& pg, const ZRegister& zn) { + VIXL_ASSERT(allow_macro_instructions_); + MovprfxHelperScope guard(this, zd, pg, zd); + ursqrte(zd, pg.Merging(), zn); + } + void Ursra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Ushllb(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ushllb(zd, zn, shift); + } + void Ushllt(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ushllt(zd, zn, shift); + } + void Usqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + void Usra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift); + void Usublb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usublb(zd, zn, zm); + } + void Usublt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usublt(zd, zn, zm); + } + void Usubwb(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usubwb(zd, zn, zm); + } + void Usubwt(const ZRegister& zd, const ZRegister& zn, const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + usubwt(zd, zn, zm); + } + void Whilege(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilege(pd, rn, rm); + } + void Whilegt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilegt(pd, rn, rm); + } + void Whilehi(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilehi(pd, rn, rm); + } + void Whilehs(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilehs(pd, rn, rm); + } + void Whilerw(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilerw(pd, rn, rm); + } + void Whilewr(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + whilewr(pd, rn, rm); + } + void Xar(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm)) { + SingleEmissionCheckScope guard(this); + xar(zd, zm, zn, shift); + } else { + MovprfxHelperScope guard(this, zd, zn); + xar(zd, zd, zm, shift); + } + } + void Fmmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Smmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Ummla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Usmmla(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Usdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + void Usdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + void Sudot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // MTE + void St2g(const Register& rt, const MemOperand& addr); + void Stg(const Register& rt, const MemOperand& addr); + void Stgp(const Register& rt1, const Register& rt2, const MemOperand& addr); + void Stz2g(const Register& rt, const MemOperand& addr); + void Stzg(const Register& rt, const MemOperand& addr); + void Ldg(const Register& rt, const MemOperand& addr); + + void Cpye(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpye(rd, rs, rn); + } + + void Cpyen(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyen(rd, rs, rn); + } + + void Cpyern(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyern(rd, rs, rn); + } + + void Cpyewn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyewn(rd, rs, rn); + } + + void Cpyfe(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfe(rd, rs, rn); + } + + void Cpyfen(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfen(rd, rs, rn); + } + + void Cpyfern(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfern(rd, rs, rn); + } + + void Cpyfewn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfewn(rd, rs, rn); + } + + void Cpyfm(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfm(rd, rs, rn); + } + + void Cpyfmn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfmn(rd, rs, rn); + } + + void Cpyfmrn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfmrn(rd, rs, rn); + } + + void Cpyfmwn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfmwn(rd, rs, rn); + } + + void Cpyfp(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfp(rd, rs, rn); + } + + void Cpyfpn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfpn(rd, rs, rn); + } + + void Cpyfprn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfprn(rd, rs, rn); + } + + void Cpyfpwn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyfpwn(rd, rs, rn); + } + + void Cpym(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpym(rd, rs, rn); + } + + void Cpymn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpymn(rd, rs, rn); + } + + void Cpymrn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpymrn(rd, rs, rn); + } + + void Cpymwn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpymwn(rd, rs, rn); + } + + void Cpyp(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyp(rd, rs, rn); + } + + void Cpypn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpypn(rd, rs, rn); + } + + void Cpyprn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpyprn(rd, rs, rn); + } + + void Cpypwn(const Register& rd, const Register& rs, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cpypwn(rd, rs, rn); + } + + void Sete(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + sete(rd, rn, rs); + } + + void Seten(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + seten(rd, rn, rs); + } + + void Setge(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setge(rd, rn, rs); + } + + void Setgen(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setgen(rd, rn, rs); + } + + void Setgm(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setgm(rd, rn, rs); + } + + void Setgmn(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setgmn(rd, rn, rs); + } + + void Setgp(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setgp(rd, rn, rs); + } + + void Setgpn(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setgpn(rd, rn, rs); + } + + void Setm(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setm(rd, rn, rs); + } + + void Setmn(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setmn(rd, rn, rs); + } + + void Setp(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setp(rd, rn, rs); + } + + void Setpn(const Register& rd, const Register& rn, const Register& rs) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setpn(rd, rn, rs); + } + +// Macro assembler wrappers that package the MOPS instructions into a single +// call. +#define MOPS_LIST(V) \ + V(Set, set, ) \ + V(Setn, set, n) \ + V(Setg, setg, ) \ + V(Setgn, setg, n) \ + V(Cpy, cpy, ) \ + V(Cpyn, cpy, n) \ + V(Cpyrn, cpy, rn) \ + V(Cpywn, cpy, wn) \ + V(Cpyf, cpyf, ) \ + V(Cpyfn, cpyf, n) \ + V(Cpyfrn, cpyf, rn) \ + V(Cpyfwn, cpyf, wn) + +#define DEFINE_MACRO_ASM_FUNC(MASM, ASMPREFIX, ASMSUFFIX) \ + void MASM(const Register& ra, const Register& rb, const Register& rc) { \ + ExactAssemblyScope scope(this, 3 * kInstructionSize); \ + ASMPREFIX##p##ASMSUFFIX(ra, rb, rc); \ + ASMPREFIX##m##ASMSUFFIX(ra, rb, rc); \ + ASMPREFIX##e##ASMSUFFIX(ra, rb, rc); \ + } + MOPS_LIST(DEFINE_MACRO_ASM_FUNC) +#undef DEFINE_MACRO_ASM_FUNC + + void Abs(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + abs(rd, rn); + } + + void Cnt(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + cnt(rd, rn); + } + + void Ctz(const Register& rd, const Register& rn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ctz(rd, rn); + } + + void Smax(const Register& rd, const Register& rn, const Operand& op); + void Smin(const Register& rd, const Register& rn, const Operand& op); + void Umax(const Register& rd, const Register& rn, const Operand& op); + void Umin(const Register& rd, const Register& rn, const Operand& op); + + template + Literal* CreateLiteralDestroyedWithPool(T value) { + return new Literal(value, + &literal_pool_, + RawLiteral::kDeletedOnPoolDestruction); + } + + template + Literal* CreateLiteralDestroyedWithPool(T high64, T low64) { + return new Literal(high64, + low64, + &literal_pool_, + RawLiteral::kDeletedOnPoolDestruction); + } + + // Push the system stack pointer (sp) down to allow the same to be done to + // the current stack pointer (according to StackPointer()). This must be + // called _before_ accessing the memory. + // + // This is necessary when pushing or otherwise adding things to the stack, to + // satisfy the AAPCS64 constraint that the memory below the system stack + // pointer is not accessed. + // + // This method asserts that StackPointer() is not sp, since the call does + // not make sense in that context. + // + // TODO: This method can only accept values of 'space' that can be encoded in + // one instruction. Refer to the implementation for details. + void BumpSystemStackPointer(const Operand& space); + + virtual bool AllowMacroInstructions() const VIXL_OVERRIDE { + return allow_macro_instructions_; + } + + virtual bool ArePoolsBlocked() const VIXL_OVERRIDE { + return IsLiteralPoolBlocked() && IsVeneerPoolBlocked(); + } + + void SetGenerateSimulatorCode(bool value) { + generate_simulator_code_ = value; + } + + bool GenerateSimulatorCode() const { return generate_simulator_code_; } + + size_t GetLiteralPoolSize() const { return literal_pool_.GetSize(); } + VIXL_DEPRECATED("GetLiteralPoolSize", size_t LiteralPoolSize() const) { + return GetLiteralPoolSize(); + } + + size_t GetLiteralPoolMaxSize() const { return literal_pool_.GetMaxSize(); } + VIXL_DEPRECATED("GetLiteralPoolMaxSize", size_t LiteralPoolMaxSize() const) { + return GetLiteralPoolMaxSize(); + } + + size_t GetVeneerPoolMaxSize() const { return veneer_pool_.GetMaxSize(); } + VIXL_DEPRECATED("GetVeneerPoolMaxSize", size_t VeneerPoolMaxSize() const) { + return GetVeneerPoolMaxSize(); + } + + // The number of unresolved branches that may require a veneer. + int GetNumberOfPotentialVeneers() const { + return veneer_pool_.GetNumberOfPotentialVeneers(); + } + VIXL_DEPRECATED("GetNumberOfPotentialVeneers", + int NumberOfPotentialVeneers() const) { + return GetNumberOfPotentialVeneers(); + } + + ptrdiff_t GetNextCheckPoint() const { + ptrdiff_t next_checkpoint_for_pools = + std::min(literal_pool_.GetCheckpoint(), veneer_pool_.GetCheckpoint()); + return std::min(next_checkpoint_for_pools, + static_cast(GetBuffer().GetCapacity())); + } + VIXL_DEPRECATED("GetNextCheckPoint", ptrdiff_t NextCheckPoint()) { + return GetNextCheckPoint(); + } + + void EmitLiteralPool(LiteralPool::EmitOption option) { + if (!literal_pool_.IsEmpty()) literal_pool_.Emit(option); + + checkpoint_ = GetNextCheckPoint(); + recommended_checkpoint_ = literal_pool_.GetNextRecommendedCheckpoint(); + } + + void CheckEmitFor(size_t amount); + void EnsureEmitFor(size_t amount) { + ptrdiff_t offset = amount; + ptrdiff_t max_pools_size = + literal_pool_.GetMaxSize() + veneer_pool_.GetMaxSize(); + ptrdiff_t cursor = GetCursorOffset(); + if ((cursor >= recommended_checkpoint_) || + ((cursor + offset + max_pools_size) >= checkpoint_)) { + CheckEmitFor(amount); + } + } + + void CheckEmitPoolsFor(size_t amount); + virtual void EnsureEmitPoolsFor(size_t amount) VIXL_OVERRIDE { + ptrdiff_t offset = amount; + ptrdiff_t max_pools_size = + literal_pool_.GetMaxSize() + veneer_pool_.GetMaxSize(); + ptrdiff_t cursor = GetCursorOffset(); + if ((cursor >= recommended_checkpoint_) || + ((cursor + offset + max_pools_size) >= checkpoint_)) { + CheckEmitPoolsFor(amount); + } + } + + // Set the current stack pointer, but don't generate any code. + void SetStackPointer(const Register& stack_pointer) { + VIXL_ASSERT(!GetScratchRegisterList()->IncludesAliasOf(stack_pointer)); + sp_ = stack_pointer; + } + + // Return the current stack pointer, as set by SetStackPointer. + const Register& StackPointer() const { return sp_; } + + CPURegList* GetScratchRegisterList() { return &tmp_list_; } + VIXL_DEPRECATED("GetScratchRegisterList", CPURegList* TmpList()) { + return GetScratchRegisterList(); + } + + CPURegList* GetScratchVRegisterList() { return &v_tmp_list_; } + VIXL_DEPRECATED("GetScratchVRegisterList", CPURegList* FPTmpList()) { + return GetScratchVRegisterList(); + } + + CPURegList* GetScratchPRegisterList() { return &p_tmp_list_; } + + // Get or set the current (most-deeply-nested) UseScratchRegisterScope. + void SetCurrentScratchRegisterScope(UseScratchRegisterScope* scope) { + current_scratch_scope_ = scope; + } + UseScratchRegisterScope* GetCurrentScratchRegisterScope() { + return current_scratch_scope_; + } + + // Like printf, but print at run-time from generated code. + // + // The caller must ensure that arguments for floating-point placeholders + // (such as %e, %f or %g) are VRegisters in format 1S or 1D, and that + // arguments for integer placeholders are Registers. + // + // At the moment it is only possible to print the value of sp if it is the + // current stack pointer. Otherwise, the MacroAssembler will automatically + // update sp on every push (using BumpSystemStackPointer), so determining its + // value is difficult. + // + // Format placeholders that refer to more than one argument, or to a specific + // argument, are not supported. This includes formats like "%1$d" or "%.*d". + // + // This function automatically preserves caller-saved registers so that + // calling code can use Printf at any point without having to worry about + // corruption. The preservation mechanism generates a lot of code. If this is + // a problem, preserve the important registers manually and then call + // PrintfNoPreserve. Callee-saved registers are not used by Printf, and are + // implicitly preserved. + void Printf(const char* format, + CPURegister arg0 = NoCPUReg, + CPURegister arg1 = NoCPUReg, + CPURegister arg2 = NoCPUReg, + CPURegister arg3 = NoCPUReg); + + // Like Printf, but don't preserve any caller-saved registers, not even 'lr'. + // + // The return code from the system printf call will be returned in x0. + void PrintfNoPreserve(const char* format, + const CPURegister& arg0 = NoCPUReg, + const CPURegister& arg1 = NoCPUReg, + const CPURegister& arg2 = NoCPUReg, + const CPURegister& arg3 = NoCPUReg); + + // Trace control when running the debug simulator. + // + // For example: + // + // __ Trace(LOG_REGS, TRACE_ENABLE); + // Will add registers to the trace if it wasn't already the case. + // + // __ Trace(LOG_DISASM, TRACE_DISABLE); + // Will stop logging disassembly. It has no effect if the disassembly wasn't + // already being logged. + void Trace(TraceParameters parameters, TraceCommand command); + + // Log the requested data independently of what is being traced. + // + // For example: + // + // __ Log(LOG_FLAGS) + // Will output the flags. + void Log(TraceParameters parameters); + + // Enable or disable CPU features dynamically. This mechanism allows users to + // strictly check the use of CPU features in different regions of code. + void SetSimulatorCPUFeatures(const CPUFeatures& features); + void EnableSimulatorCPUFeatures(const CPUFeatures& features); + void DisableSimulatorCPUFeatures(const CPUFeatures& features); + void SaveSimulatorCPUFeatures(); + void RestoreSimulatorCPUFeatures(); + + LiteralPool* GetLiteralPool() { return &literal_pool_; } + +// Support for simulated runtime calls. + +// `CallRuntime` requires variadic templating, that is only available from +// C++11. +#if __cplusplus >= 201103L +#define VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT +#endif // #if __cplusplus >= 201103L + +#ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT + template + void CallRuntimeHelper(R (*function)(P...), RuntimeCallType call_type); + + template + void CallRuntime(R (*function)(P...)) { + CallRuntimeHelper(function, kCallRuntime); + } + + template + void TailCallRuntime(R (*function)(P...)) { + CallRuntimeHelper(function, kTailCallRuntime); + } +#endif // #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT + + protected: + void BlockLiteralPool() { literal_pool_.Block(); } + void ReleaseLiteralPool() { literal_pool_.Release(); } + bool IsLiteralPoolBlocked() const { return literal_pool_.IsBlocked(); } + void BlockVeneerPool() { veneer_pool_.Block(); } + void ReleaseVeneerPool() { veneer_pool_.Release(); } + bool IsVeneerPoolBlocked() const { return veneer_pool_.IsBlocked(); } + + virtual void BlockPools() VIXL_OVERRIDE { + BlockLiteralPool(); + BlockVeneerPool(); + } + + virtual void ReleasePools() VIXL_OVERRIDE { + ReleaseLiteralPool(); + ReleaseVeneerPool(); + } + + // The scopes below need to able to block and release a particular pool. + // TODO: Consider removing those scopes or move them to + // code-generation-scopes-vixl.h. + friend class BlockPoolsScope; + friend class BlockLiteralPoolScope; + friend class BlockVeneerPoolScope; + + virtual void SetAllowMacroInstructions(bool value) VIXL_OVERRIDE { + allow_macro_instructions_ = value; + } + + // Helper used to query information about code generation and to generate + // code for `csel`. + // Here and for the related helpers below: + // - Code is generated when `masm` is not `NULL`. + // - On return and when set, `should_synthesise_left` and + // `should_synthesise_right` will indicate whether `left` and `right` + // should be synthesized in a temporary register. + static void CselHelper(MacroAssembler* masm, + const Register& rd, + Operand left, + Operand right, + Condition cond, + bool* should_synthesise_left = NULL, + bool* should_synthesise_right = NULL); + + // The helper returns `true` if it can handle the specified arguments. + // Also see comments for `CselHelper()`. + static bool CselSubHelperTwoImmediates(MacroAssembler* masm, + const Register& rd, + int64_t left, + int64_t right, + Condition cond, + bool* should_synthesise_left, + bool* should_synthesise_right); + + // See comments for `CselHelper()`. + static bool CselSubHelperTwoOrderedImmediates(MacroAssembler* masm, + const Register& rd, + int64_t left, + int64_t right, + Condition cond); + + // See comments for `CselHelper()`. + static void CselSubHelperRightSmallImmediate(MacroAssembler* masm, + UseScratchRegisterScope* temps, + const Register& rd, + const Operand& left, + const Operand& right, + Condition cond, + bool* should_synthesise_left); + + // Generate code to calculate the address represented by `addr` and write it + // into `xd`. This is used as a common fall-back for out-of-range load and + // store operands. + // + // The vl_divisor_log2 argument is used to scale the VL, for use with + // SVE_MUL_VL. + void CalculateSVEAddress(const Register& xd, + const SVEMemOperand& addr, + int vl_divisor_log2 = 0); + + void CalculateSVEAddress(const Register& xd, + const SVEMemOperand& addr, + const CPURegister& rt) { + VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); + int vl_divisor_log2 = rt.IsPRegister() ? kZRegBitsPerPRegBitLog2 : 0; + CalculateSVEAddress(xd, addr, vl_divisor_log2); + } + + void SetFPNaNPropagationOption(FPMacroNaNPropagationOption nan_option) { + fp_nan_propagation_ = nan_option; + } + + void ResolveFPNaNPropagationOption(FPMacroNaNPropagationOption* nan_option) { + // The input option has priority over the option that has set. + if (*nan_option == NoFPMacroNaNPropagationSelected) { + *nan_option = fp_nan_propagation_; + } + VIXL_ASSERT(*nan_option != NoFPMacroNaNPropagationSelected); + } + + private: + // The actual Push and Pop implementations. These don't generate any code + // other than that required for the push or pop. This allows + // (Push|Pop)CPURegList to bundle together setup code for a large block of + // registers. + // + // Note that size is per register, and is specified in bytes. + void PushHelper(int count, + int size, + const CPURegister& src0, + const CPURegister& src1, + const CPURegister& src2, + const CPURegister& src3); + void PopHelper(int count, + int size, + const CPURegister& dst0, + const CPURegister& dst1, + const CPURegister& dst2, + const CPURegister& dst3); + + void Movi16bitHelper(const VRegister& vd, uint64_t imm); + void Movi32bitHelper(const VRegister& vd, uint64_t imm); + void Movi64bitHelper(const VRegister& vd, uint64_t imm); + + // Perform necessary maintenance operations before a push or pop. + // + // Note that size is per register, and is specified in bytes. + void PrepareForPush(int count, int size); + void PrepareForPop(int count, int size); + + // The actual implementation of load and store operations for CPURegList. + enum LoadStoreCPURegListAction { kLoad, kStore }; + void LoadStoreCPURegListHelper(LoadStoreCPURegListAction operation, + CPURegList registers, + const MemOperand& mem); + // Returns a MemOperand suitable for loading or storing a CPURegList at `dst`. + // This helper may allocate registers from `scratch_scope` and generate code + // to compute an intermediate address. The resulting MemOperand is only valid + // as long as `scratch_scope` remains valid. + MemOperand BaseMemOperandForLoadStoreCPURegList( + const CPURegList& registers, + const MemOperand& mem, + UseScratchRegisterScope* scratch_scope); + + bool LabelIsOutOfRange(Label* label, ImmBranchType branch_type) { + return !Instruction::IsValidImmPCOffset(branch_type, + label->GetLocation() - + GetCursorOffset()); + } + + void ConfigureSimulatorCPUFeaturesHelper(const CPUFeatures& features, + DebugHltOpcode action); + + void CompareHelper(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm); + + // E.g. Ld1rb. + typedef void (Assembler::*SVELoadBroadcastFn)(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + + void SVELoadBroadcastImmHelper(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + SVELoadBroadcastFn fn, + int divisor); + + // E.g. ldr/str + typedef void (Assembler::*SVELoadStoreFn)(const CPURegister& rt, + const SVEMemOperand& addr); + + void SVELoadStoreScalarImmHelper(const CPURegister& rt, + const SVEMemOperand& addr, + SVELoadStoreFn fn); + + typedef void (Assembler::*SVELoad1Fn)(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr); + typedef void (Assembler::*SVEStore1Fn)(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr); + + // Helper for predicated Z register loads with addressing modes not directly + // encodable in the instruction. The supported_modifier parameter indicates + // which offset modifier the calling instruction encoder supports (eg. + // SVE_MUL_VL). The ratio log2 of VL to memory access size is passed as + // vl_divisor_log2; pass -1 to indicate no dependency. + template + void SVELoadStoreNTBroadcastQOHelper( + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn, + int imm_bits, + int shift_amount, + SVEOffsetModifier supported_modifier = NO_SVE_OFFSET_MODIFIER, + int vl_divisor_log2 = 0); + + template + void SVELoadStore1Helper(int msize_in_bytes_log2, + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn); + + template + void SVELoadFFHelper(int msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + Tf fn); + + typedef void (MacroAssembler::*IntWideImmMacroFn)(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm); + + typedef void (Assembler::*IntWideImmShiftFn)(const ZRegister& zd, + const ZRegister& zn, + int imm, + int shift); + + typedef void (Assembler::*Int3ArithFn)(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm); + + typedef void (Assembler::*Int4ArithFn)(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + typedef void (Assembler::*IntArithImmFn)(const ZRegister& zd, + const ZRegister& zn, + int imm); + + typedef void (Assembler::*ZZZImmFn)(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm); + + typedef void (MacroAssembler::*SVEArithPredicatedFn)(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + void IntWideImmHelper(IntArithImmFn imm_fn, + SVEArithPredicatedFn reg_fn, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm, + bool is_signed_imm); + + enum AddSubHelperOption { kAddImmediate, kSubImmediate }; + + void AddSubHelper(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm); + + // Try to emit an add- or sub-like instruction (imm_fn) with `imm`, or the + // corresponding sub- or add-like instruction (n_imm_fn) with a negated `imm`. + // A `movprfx` is automatically generated if one is required. If successful, + // return true. Otherwise, return false. + // + // This helper uses two's complement equivalences, for example treating 0xffff + // as -1 for H-sized lanes. + bool TrySingleAddSub(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm); + + void AbsoluteDifferenceAccumulate(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + void FourRegDestructiveHelper(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + void FourRegDestructiveHelper(Int4ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm); + + void SVEDotIndexHelper(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index); + + // For noncommutative arithmetic operations. + void NoncommutativeArithmeticHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + SVEArithPredicatedFn rev_fn); + + void FPCommutativeArithmeticHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + FPMacroNaNPropagationOption nan_option); + + // Floating-point fused multiply-add vectors (predicated), writing addend. + typedef void (Assembler::*SVEMulAddPredicatedZdaFn)(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + // Floating-point fused multiply-add vectors (predicated), writing + // multiplicand. + typedef void (Assembler::*SVEMulAddPredicatedZdnFn)(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm); + + void FPMulAddHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + SVEMulAddPredicatedZdaFn fn_zda, + SVEMulAddPredicatedZdnFn fn_zdn, + FPMacroNaNPropagationOption nan_option); + + typedef void (Assembler::*SVEMulAddIndexFn)(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index); + + void FourRegOneImmDestructiveHelper(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int imm); + + void ShiftRightAccumulate(IntArithImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int imm); + + void ComplexAddition(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot); + + // Tell whether any of the macro instruction can be used. When false the + // MacroAssembler will assert if a method which can emit a variable number + // of instructions is called. + bool allow_macro_instructions_; + + // Indicates whether we should generate simulator or native code. + bool generate_simulator_code_; + + // The register to use as a stack pointer for stack operations. + Register sp_; + + // Scratch registers available for use by the MacroAssembler. + CPURegList tmp_list_; + CPURegList v_tmp_list_; + CPURegList p_tmp_list_; + + UseScratchRegisterScope* current_scratch_scope_; + + LiteralPool literal_pool_; + VeneerPool veneer_pool_; + + ptrdiff_t checkpoint_; + ptrdiff_t recommended_checkpoint_; + + FPMacroNaNPropagationOption fp_nan_propagation_; + + friend class Pool; + friend class LiteralPool; +}; + + +inline size_t VeneerPool::GetOtherPoolsMaxSize() const { + return masm_->GetLiteralPoolMaxSize(); +} + + +inline size_t LiteralPool::GetOtherPoolsMaxSize() const { + return masm_->GetVeneerPoolMaxSize(); +} + + +inline void LiteralPool::SetNextRecommendedCheckpoint(ptrdiff_t offset) { + masm_->recommended_checkpoint_ = + std::min(masm_->recommended_checkpoint_, offset); + recommended_checkpoint_ = offset; +} + +class InstructionAccurateScope : public ExactAssemblyScope { + public: + VIXL_DEPRECATED("ExactAssemblyScope", + InstructionAccurateScope(MacroAssembler* masm, + int64_t count, + SizePolicy size_policy = kExactSize)) + : ExactAssemblyScope(masm, count * kInstructionSize, size_policy) {} +}; + +class BlockLiteralPoolScope { + public: + explicit BlockLiteralPoolScope(MacroAssembler* masm) : masm_(masm) { + masm_->BlockLiteralPool(); + } + + ~BlockLiteralPoolScope() { masm_->ReleaseLiteralPool(); } + + private: + MacroAssembler* masm_; +}; + + +class BlockVeneerPoolScope { + public: + explicit BlockVeneerPoolScope(MacroAssembler* masm) : masm_(masm) { + masm_->BlockVeneerPool(); + } + + ~BlockVeneerPoolScope() { masm_->ReleaseVeneerPool(); } + + private: + MacroAssembler* masm_; +}; + + +class BlockPoolsScope { + public: + explicit BlockPoolsScope(MacroAssembler* masm) : masm_(masm) { + masm_->BlockPools(); + } + + ~BlockPoolsScope() { masm_->ReleasePools(); } + + private: + MacroAssembler* masm_; +}; + +MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const ZRegister& src) + : ExactAssemblyScope(masm, + ShouldGenerateMovprfx(dst, src) + ? (2 * kInstructionSize) + : kInstructionSize) { + if (ShouldGenerateMovprfx(dst, src)) { + masm->movprfx(dst, src); + } +} + +MovprfxHelperScope::MovprfxHelperScope(MacroAssembler* masm, + const ZRegister& dst, + const PRegister& pg, + const ZRegister& src) + : ExactAssemblyScope(masm, + ShouldGenerateMovprfx(dst, pg, src) + ? (2 * kInstructionSize) + : kInstructionSize) { + if (ShouldGenerateMovprfx(dst, pg, src)) { + masm->movprfx(dst, pg, src); + } +} + +// This scope utility allows scratch registers to be managed safely. The +// MacroAssembler's GetScratch*RegisterList() are used as a pool of scratch +// registers. These registers can be allocated on demand, and will be returned +// at the end of the scope. +// +// When the scope ends, the MacroAssembler's lists will be restored to their +// original state, even if the lists were modified by some other means. +class UseScratchRegisterScope { + public: + // This constructor implicitly calls `Open` to initialise the scope (`masm` + // must not be `NULL`), so it is ready to use immediately after it has been + // constructed. + explicit UseScratchRegisterScope(MacroAssembler* masm) + : masm_(NULL), + parent_(NULL), + old_available_(0), + old_available_v_(0), + old_available_p_(0) { + Open(masm); + } + // This constructor does not implicitly initialise the scope. Instead, the + // user is required to explicitly call the `Open` function before using the + // scope. + UseScratchRegisterScope() + : masm_(NULL), + parent_(NULL), + old_available_(0), + old_available_v_(0), + old_available_p_(0) {} + + // This function performs the actual initialisation work. + void Open(MacroAssembler* masm); + + // The destructor always implicitly calls the `Close` function. + ~UseScratchRegisterScope() { Close(); } + + // This function performs the cleaning-up work. It must succeed even if the + // scope has not been opened. It is safe to call multiple times. + void Close(); + + + bool IsAvailable(const CPURegister& reg) const; + + // Take a register from the appropriate temps list. It will be returned + // automatically when the scope ends. + Register AcquireW() { + return AcquireFrom(masm_->GetScratchRegisterList()).W(); + } + Register AcquireX() { + return AcquireFrom(masm_->GetScratchRegisterList()).X(); + } + VRegister AcquireH() { + return AcquireFrom(masm_->GetScratchVRegisterList()).H(); + } + VRegister AcquireS() { + return AcquireFrom(masm_->GetScratchVRegisterList()).S(); + } + VRegister AcquireD() { + return AcquireFrom(masm_->GetScratchVRegisterList()).D(); + } + ZRegister AcquireZ() { + return AcquireFrom(masm_->GetScratchVRegisterList()).Z(); + } + PRegister AcquireP() { + // Prefer to allocate p8-p15 if we can, to leave p0-p7 available for use as + // governing predicates. + CPURegList* available = masm_->GetScratchPRegisterList(); + RegList preferred = ~kGoverningPRegisterMask; + if ((available->GetList() & preferred) != 0) { + return AcquireFrom(available, preferred).P(); + } + return AcquireFrom(available).P(); + } + // Acquire a P register suitable for use as a governing predicate in + // instructions which only accept p0-p7 for that purpose. + PRegister AcquireGoverningP() { + CPURegList* available = masm_->GetScratchPRegisterList(); + return AcquireFrom(available, kGoverningPRegisterMask).P(); + } + + Register AcquireRegisterOfSize(int size_in_bits); + Register AcquireSameSizeAs(const Register& reg) { + return AcquireRegisterOfSize(reg.GetSizeInBits()); + } + VRegister AcquireVRegisterOfSize(int size_in_bits); + VRegister AcquireSameSizeAs(const VRegister& reg) { + return AcquireVRegisterOfSize(reg.GetSizeInBits()); + } + CPURegister AcquireCPURegisterOfSize(int size_in_bits) { + return masm_->GetScratchRegisterList()->IsEmpty() + ? CPURegister(AcquireVRegisterOfSize(size_in_bits)) + : CPURegister(AcquireRegisterOfSize(size_in_bits)); + } + + // Acquire a register big enough to represent one lane of `vector`. + Register AcquireRegisterToHoldLane(const CPURegister& vector) { + VIXL_ASSERT(vector.GetLaneSizeInBits() <= kXRegSize); + return (vector.GetLaneSizeInBits() > kWRegSize) ? AcquireX() : AcquireW(); + } + + + // Explicitly release an acquired (or excluded) register, putting it back in + // the appropriate temps list. + void Release(const CPURegister& reg); + + + // Make the specified registers available as scratch registers for the + // duration of this scope. + void Include(const CPURegList& list); + void Include(const Register& reg1, + const Register& reg2 = NoReg, + const Register& reg3 = NoReg, + const Register& reg4 = NoReg); + void Include(const VRegister& reg1, + const VRegister& reg2 = NoVReg, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + void Include(const CPURegister& reg1, + const CPURegister& reg2 = NoCPUReg, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + + + // Make sure that the specified registers are not available in this scope. + // This can be used to prevent helper functions from using sensitive + // registers, for example. + void Exclude(const CPURegList& list); + void Exclude(const Register& reg1, + const Register& reg2 = NoReg, + const Register& reg3 = NoReg, + const Register& reg4 = NoReg); + void Exclude(const VRegister& reg1, + const VRegister& reg2 = NoVReg, + const VRegister& reg3 = NoVReg, + const VRegister& reg4 = NoVReg); + void Exclude(const CPURegister& reg1, + const CPURegister& reg2 = NoCPUReg, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + + // Convenience for excluding registers that are part of Operands. This is + // useful for sequences like this: + // + // // Use 'rd' as a scratch, but only if it's not aliased by an input. + // temps.Include(rd); + // temps.Exclude(rn); + // temps.Exclude(operand); + // + // Otherwise, a conditional check is needed on the last 'Exclude'. + void Exclude(const Operand& operand) { + if (operand.IsShiftedRegister() || operand.IsExtendedRegister()) { + Exclude(operand.GetRegister()); + } else { + VIXL_ASSERT(operand.IsImmediate()); + } + } + + // Prevent any scratch registers from being used in this scope. + void ExcludeAll(); + + private: + static CPURegister AcquireFrom(CPURegList* available, + RegList mask = ~static_cast(0)); + + static void ReleaseByCode(CPURegList* available, int code); + static void ReleaseByRegList(CPURegList* available, RegList regs); + static void IncludeByRegList(CPURegList* available, RegList exclude); + static void ExcludeByRegList(CPURegList* available, RegList exclude); + + CPURegList* GetAvailableListFor(CPURegister::RegisterBank bank); + + static const RegList kGoverningPRegisterMask = + (static_cast(1) << kNumberOfGoverningPRegisters) - 1; + + // The MacroAssembler maintains a list of available scratch registers, and + // also keeps track of the most recently-opened scope so that on destruction + // we can check that scopes do not outlive their parents. + MacroAssembler* masm_; + UseScratchRegisterScope* parent_; + + // The state of the available lists at the start of this scope. + RegList old_available_; // kRegister + RegList old_available_v_; // kVRegister / kZRegister + RegList old_available_p_; // kPRegister + + // Disallow copy constructor and operator=. + VIXL_NO_RETURN_IN_DEBUG_MODE UseScratchRegisterScope( + const UseScratchRegisterScope&) { + VIXL_UNREACHABLE(); + } + VIXL_NO_RETURN_IN_DEBUG_MODE void operator=(const UseScratchRegisterScope&) { + VIXL_UNREACHABLE(); + } +}; + + +// Like CPUFeaturesScope, but also generate Simulation pseudo-instructions to +// control a Simulator's CPUFeatures dynamically. +// +// One major difference from CPUFeaturesScope is that this scope cannot offer +// a writable "CPUFeatures* GetCPUFeatures()", because every write to the +// features needs a corresponding macro instruction. +class SimulationCPUFeaturesScope { + public: + template + explicit SimulationCPUFeaturesScope(MacroAssembler* masm, T... features) + : masm_(masm), cpu_features_scope_(masm, features...) { + masm_->SaveSimulatorCPUFeatures(); + masm_->EnableSimulatorCPUFeatures(CPUFeatures(features...)); + } + + ~SimulationCPUFeaturesScope() { masm_->RestoreSimulatorCPUFeatures(); } + + const CPUFeatures* GetCPUFeatures() const { + return cpu_features_scope_.GetCPUFeatures(); + } + + void SetCPUFeatures(const CPUFeatures& cpu_features) { + cpu_features_scope_.SetCPUFeatures(cpu_features); + masm_->SetSimulatorCPUFeatures(cpu_features); + } + + private: + MacroAssembler* masm_; + CPUFeaturesScope cpu_features_scope_; +}; + + +// Variadic templating is only available from C++11. +#ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT + +// `R` stands for 'return type', and `P` for 'parameter types'. +template +void MacroAssembler::CallRuntimeHelper(R (*function)(P...), + RuntimeCallType call_type) { + if (generate_simulator_code_) { +#ifdef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT + uintptr_t runtime_call_wrapper_address = reinterpret_cast( + &(Simulator::RuntimeCallStructHelper::Wrapper)); + uintptr_t function_address = reinterpret_cast(function); + + EmissionCheckScope guard(this, + kRuntimeCallLength, + CodeBufferCheckScope::kExactSize); + Label start; + bind(&start); + { + ExactAssemblyScope scope(this, kInstructionSize); + hlt(kRuntimeCallOpcode); + } + VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == + kRuntimeCallWrapperOffset); + dc(runtime_call_wrapper_address); + VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == + kRuntimeCallFunctionOffset); + dc(function_address); + VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kRuntimeCallTypeOffset); + dc32(call_type); + VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kRuntimeCallLength); +#else + VIXL_UNREACHABLE(); +#endif // #ifdef VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT + } else { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireX(); + Mov(temp, reinterpret_cast(function)); + if (call_type == kTailCallRuntime) { + Br(temp); + } else { + VIXL_ASSERT(call_type == kCallRuntime); + Blr(temp); + } + } +} + +#endif // #ifdef VIXL_HAS_MACROASSEMBLER_RUNTIME_CALL_SUPPORT + +} // namespace aarch64 + +// Required InvalSet template specialisations. +// TODO: These template specialisations should not live in this file. Move +// VeneerPool out of the aarch64 namespace in order to share its implementation +// later. +template <> +inline ptrdiff_t InvalSet:: + GetKey(const aarch64::VeneerPool::BranchInfo& branch_info) { + return branch_info.first_unreacheable_pc_; +} +template <> +inline void InvalSet:: + SetKey(aarch64::VeneerPool::BranchInfo* branch_info, ptrdiff_t key) { + branch_info->first_unreacheable_pc_ = key; +} + +} // namespace vixl + +#endif // VIXL_AARCH64_MACRO_ASSEMBLER_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/operands-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/operands-aarch64.h new file mode 100644 index 0000000000..ba3df18133 --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/operands-aarch64.h @@ -0,0 +1,999 @@ +// Copyright 2016, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_OPERANDS_AARCH64_H_ +#define VIXL_AARCH64_OPERANDS_AARCH64_H_ + +#include +#include + +#include "instructions-aarch64.h" +#include "registers-aarch64.h" + +namespace vixl { +namespace aarch64 { + +// Lists of registers. +class CPURegList { + public: + explicit CPURegList(CPURegister reg1, + CPURegister reg2 = NoCPUReg, + CPURegister reg3 = NoCPUReg, + CPURegister reg4 = NoCPUReg) + : list_(reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit()), + size_(reg1.GetSizeInBits()), + type_(reg1.GetType()) { + VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4)); + VIXL_ASSERT(IsValid()); + } + + CPURegList(CPURegister::RegisterType type, unsigned size, RegList list) + : list_(list), size_(size), type_(type) { + VIXL_ASSERT(IsValid()); + } + + CPURegList(CPURegister::RegisterType type, + unsigned size, + unsigned first_reg, + unsigned last_reg) + : size_(size), type_(type) { + VIXL_ASSERT( + ((type == CPURegister::kRegister) && (last_reg < kNumberOfRegisters)) || + ((type == CPURegister::kVRegister) && + (last_reg < kNumberOfVRegisters))); + VIXL_ASSERT(last_reg >= first_reg); + list_ = (UINT64_C(1) << (last_reg + 1)) - 1; + list_ &= ~((UINT64_C(1) << first_reg) - 1); + VIXL_ASSERT(IsValid()); + } + + // Construct an empty CPURegList with the specified size and type. If `size` + // is CPURegister::kUnknownSize and the register type requires a size, a valid + // but unspecified default will be picked. + static CPURegList Empty(CPURegister::RegisterType type, + unsigned size = CPURegister::kUnknownSize) { + return CPURegList(type, GetDefaultSizeFor(type, size), 0); + } + + // Construct a CPURegList with all possible registers with the specified size + // and type. If `size` is CPURegister::kUnknownSize and the register type + // requires a size, a valid but unspecified default will be picked. + static CPURegList All(CPURegister::RegisterType type, + unsigned size = CPURegister::kUnknownSize) { + unsigned number_of_registers = (CPURegister::GetMaxCodeFor(type) + 1); + RegList list = (static_cast(1) << number_of_registers) - 1; + if (type == CPURegister::kRegister) { + // GetMaxCodeFor(kRegister) ignores SP, so explicitly include it. + list |= (static_cast(1) << kSPRegInternalCode); + } + return CPURegList(type, GetDefaultSizeFor(type, size), list); + } + + CPURegister::RegisterType GetType() const { + VIXL_ASSERT(IsValid()); + return type_; + } + VIXL_DEPRECATED("GetType", CPURegister::RegisterType type() const) { + return GetType(); + } + + CPURegister::RegisterBank GetBank() const { + return CPURegister::GetBankFor(GetType()); + } + + // Combine another CPURegList into this one. Registers that already exist in + // this list are left unchanged. The type and size of the registers in the + // 'other' list must match those in this list. + void Combine(const CPURegList& other) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(other.GetType() == type_); + VIXL_ASSERT(other.GetRegisterSizeInBits() == size_); + list_ |= other.GetList(); + } + + // Remove every register in the other CPURegList from this one. Registers that + // do not exist in this list are ignored. The type and size of the registers + // in the 'other' list must match those in this list. + void Remove(const CPURegList& other) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(other.GetType() == type_); + VIXL_ASSERT(other.GetRegisterSizeInBits() == size_); + list_ &= ~other.GetList(); + } + + // Variants of Combine and Remove which take a single register. + void Combine(const CPURegister& other) { + VIXL_ASSERT(other.GetType() == type_); + VIXL_ASSERT(other.GetSizeInBits() == size_); + Combine(other.GetCode()); + } + + void Remove(const CPURegister& other) { + VIXL_ASSERT(other.GetType() == type_); + VIXL_ASSERT(other.GetSizeInBits() == size_); + Remove(other.GetCode()); + } + + // Variants of Combine and Remove which take a single register by its code; + // the type and size of the register is inferred from this list. + void Combine(int code) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); + list_ |= (UINT64_C(1) << code); + } + + void Remove(int code) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(CPURegister(code, size_, type_).IsValid()); + list_ &= ~(UINT64_C(1) << code); + } + + static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) { + VIXL_ASSERT(list_1.type_ == list_2.type_); + VIXL_ASSERT(list_1.size_ == list_2.size_); + return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_); + } + static CPURegList Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3); + static CPURegList Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4); + + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2) { + VIXL_ASSERT(list_1.type_ == list_2.type_); + VIXL_ASSERT(list_1.size_ == list_2.size_); + return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_); + } + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3); + static CPURegList Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4); + + bool Overlaps(const CPURegList& other) const { + return (type_ == other.type_) && ((list_ & other.list_) != 0); + } + + RegList GetList() const { + VIXL_ASSERT(IsValid()); + return list_; + } + VIXL_DEPRECATED("GetList", RegList list() const) { return GetList(); } + + void SetList(RegList new_list) { + VIXL_ASSERT(IsValid()); + list_ = new_list; + } + VIXL_DEPRECATED("SetList", void set_list(RegList new_list)) { + return SetList(new_list); + } + + // Remove all callee-saved registers from the list. This can be useful when + // preparing registers for an AAPCS64 function call, for example. + void RemoveCalleeSaved(); + + // Find the register in this list that appears in `mask` with the lowest or + // highest code, remove it from the list and return it as a CPURegister. If + // the list is empty, leave it unchanged and return NoCPUReg. + CPURegister PopLowestIndex(RegList mask = ~static_cast(0)); + CPURegister PopHighestIndex(RegList mask = ~static_cast(0)); + + // AAPCS64 callee-saved registers. + static CPURegList GetCalleeSaved(unsigned size = kXRegSize); + static CPURegList GetCalleeSavedV(unsigned size = kDRegSize); + + // AAPCS64 caller-saved registers. Note that this includes lr. + // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top + // 64-bits being caller-saved. + static CPURegList GetCallerSaved(unsigned size = kXRegSize); + static CPURegList GetCallerSavedV(unsigned size = kDRegSize); + + bool IsEmpty() const { + VIXL_ASSERT(IsValid()); + return list_ == 0; + } + + bool IncludesAliasOf(const CPURegister& other) const { + VIXL_ASSERT(IsValid()); + return (GetBank() == other.GetBank()) && IncludesAliasOf(other.GetCode()); + } + + bool IncludesAliasOf(int code) const { + VIXL_ASSERT(IsValid()); + return (((static_cast(1) << code) & list_) != 0); + } + + int GetCount() const { + VIXL_ASSERT(IsValid()); + return CountSetBits(list_); + } + VIXL_DEPRECATED("GetCount", int Count()) const { return GetCount(); } + + int GetRegisterSizeInBits() const { + VIXL_ASSERT(IsValid()); + return size_; + } + VIXL_DEPRECATED("GetRegisterSizeInBits", int RegisterSizeInBits() const) { + return GetRegisterSizeInBits(); + } + + int GetRegisterSizeInBytes() const { + int size_in_bits = GetRegisterSizeInBits(); + VIXL_ASSERT((size_in_bits % 8) == 0); + return size_in_bits / 8; + } + VIXL_DEPRECATED("GetRegisterSizeInBytes", int RegisterSizeInBytes() const) { + return GetRegisterSizeInBytes(); + } + + unsigned GetTotalSizeInBytes() const { + VIXL_ASSERT(IsValid()); + return GetRegisterSizeInBytes() * GetCount(); + } + VIXL_DEPRECATED("GetTotalSizeInBytes", unsigned TotalSizeInBytes() const) { + return GetTotalSizeInBytes(); + } + + private: + // If `size` is CPURegister::kUnknownSize and the type requires a known size, + // then return an arbitrary-but-valid size. + // + // Otherwise, the size is checked for validity and returned unchanged. + static unsigned GetDefaultSizeFor(CPURegister::RegisterType type, + unsigned size) { + if (size == CPURegister::kUnknownSize) { + if (type == CPURegister::kRegister) size = kXRegSize; + if (type == CPURegister::kVRegister) size = kQRegSize; + // All other types require kUnknownSize. + } + VIXL_ASSERT(CPURegister(0, size, type).IsValid()); + return size; + } + + RegList list_; + int size_; + CPURegister::RegisterType type_; + + bool IsValid() const; +}; + + +// AAPCS64 callee-saved registers. +extern const CPURegList kCalleeSaved; +extern const CPURegList kCalleeSavedV; + + +// AAPCS64 caller-saved registers. Note that this includes lr. +extern const CPURegList kCallerSaved; +extern const CPURegList kCallerSavedV; + +class IntegerOperand; + +// Operand. +class Operand { + public: + // # + // where is int64_t. + // This is allowed to be an implicit constructor because Operand is + // a wrapper class that doesn't normally perform any type conversion. + Operand(int64_t immediate); // NOLINT(runtime/explicit) + + Operand(IntegerOperand immediate); // NOLINT(runtime/explicit) + + // rm, { #} + // where is one of {LSL, LSR, ASR, ROR}. + // is uint6_t. + // This is allowed to be an implicit constructor because Operand is + // a wrapper class that doesn't normally perform any type conversion. + Operand(Register reg, + Shift shift = LSL, + unsigned shift_amount = 0); // NOLINT(runtime/explicit) + + // rm, { {#}} + // where is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}. + // is uint2_t. + explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0); + + bool IsImmediate() const; + bool IsPlainRegister() const; + bool IsShiftedRegister() const; + bool IsExtendedRegister() const; + bool IsZero() const; + + // This returns an LSL shift (<= 4) operand as an equivalent extend operand, + // which helps in the encoding of instructions that use the stack pointer. + Operand ToExtendedRegister() const; + + int64_t GetImmediate() const { + VIXL_ASSERT(IsImmediate()); + return immediate_; + } + VIXL_DEPRECATED("GetImmediate", int64_t immediate() const) { + return GetImmediate(); + } + + int64_t GetEquivalentImmediate() const { + return IsZero() ? 0 : GetImmediate(); + } + + Register GetRegister() const { + VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister()); + return reg_; + } + VIXL_DEPRECATED("GetRegister", Register reg() const) { return GetRegister(); } + Register GetBaseRegister() const { return GetRegister(); } + + Shift GetShift() const { + VIXL_ASSERT(IsShiftedRegister()); + return shift_; + } + VIXL_DEPRECATED("GetShift", Shift shift() const) { return GetShift(); } + + Extend GetExtend() const { + VIXL_ASSERT(IsExtendedRegister()); + return extend_; + } + VIXL_DEPRECATED("GetExtend", Extend extend() const) { return GetExtend(); } + + unsigned GetShiftAmount() const { + VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister()); + return shift_amount_; + } + VIXL_DEPRECATED("GetShiftAmount", unsigned shift_amount() const) { + return GetShiftAmount(); + } + + private: + int64_t immediate_; + Register reg_; + Shift shift_; + Extend extend_; + unsigned shift_amount_; +}; + + +// MemOperand represents the addressing mode of a load or store instruction. +// In assembly syntax, MemOperands are normally denoted by one or more elements +// inside or around square brackets. +class MemOperand { + public: + // Creates an invalid `MemOperand`. + MemOperand(); + explicit MemOperand(Register base, + int64_t offset = 0, + AddrMode addrmode = Offset); + MemOperand(Register base, + Register regoffset, + Shift shift = LSL, + unsigned shift_amount = 0); + MemOperand(Register base, + Register regoffset, + Extend extend, + unsigned shift_amount = 0); + MemOperand(Register base, const Operand& offset, AddrMode addrmode = Offset); + + const Register& GetBaseRegister() const { return base_; } + + // If the MemOperand has a register offset, return it. (This also applies to + // pre- and post-index modes.) Otherwise, return NoReg. + const Register& GetRegisterOffset() const { return regoffset_; } + + // If the MemOperand has an immediate offset, return it. (This also applies to + // pre- and post-index modes.) Otherwise, return 0. + int64_t GetOffset() const { return offset_; } + + AddrMode GetAddrMode() const { return addrmode_; } + Shift GetShift() const { return shift_; } + Extend GetExtend() const { return extend_; } + + unsigned GetShiftAmount() const { + // Extend modes can also encode a shift for some instructions. + VIXL_ASSERT((GetShift() != NO_SHIFT) || (GetExtend() != NO_EXTEND)); + return shift_amount_; + } + + // True for MemOperands which represent something like [x0]. + // Currently, this will also return true for [x0, #0], because MemOperand has + // no way to distinguish the two. + bool IsPlainRegister() const; + + // True for MemOperands which represent something like [x0], or for compound + // MemOperands which are functionally equivalent, such as [x0, #0], [x0, xzr] + // or [x0, wzr, UXTW #3]. + bool IsEquivalentToPlainRegister() const; + + // True for immediate-offset (but not indexed) MemOperands. + bool IsImmediateOffset() const; + // True for register-offset (but not indexed) MemOperands. + bool IsRegisterOffset() const; + // True for immediate or register pre-indexed MemOperands. + bool IsPreIndex() const; + // True for immediate or register post-indexed MemOperands. + bool IsPostIndex() const; + // True for immediate pre-indexed MemOperands, [reg, #imm]! + bool IsImmediatePreIndex() const; + // True for immediate post-indexed MemOperands, [reg], #imm + bool IsImmediatePostIndex() const; + + void AddOffset(int64_t offset); + + bool IsValid() const { + return base_.IsValid() && + ((addrmode_ == Offset) || (addrmode_ == PreIndex) || + (addrmode_ == PostIndex)) && + ((shift_ == NO_SHIFT) || (extend_ == NO_EXTEND)) && + ((offset_ == 0) || !regoffset_.IsValid()); + } + + bool Equals(const MemOperand& other) const { + return base_.Is(other.base_) && regoffset_.Is(other.regoffset_) && + (offset_ == other.offset_) && (addrmode_ == other.addrmode_) && + (shift_ == other.shift_) && (extend_ == other.extend_) && + (shift_amount_ == other.shift_amount_); + } + + private: + Register base_; + Register regoffset_; + int64_t offset_; + AddrMode addrmode_; + Shift shift_; + Extend extend_; + unsigned shift_amount_; +}; + +// SVE supports memory operands which don't make sense to the core ISA, such as +// scatter-gather forms, in which either the base or offset registers are +// vectors. This class exists to avoid complicating core-ISA code with +// SVE-specific behaviour. +// +// Note that SVE does not support any pre- or post-index modes. +class SVEMemOperand { + public: + // "vector-plus-immediate", like [z0.s, #21] + explicit SVEMemOperand(ZRegister base, uint64_t offset = 0) + : base_(base), + regoffset_(NoReg), + offset_(RawbitsToInt64(offset)), + mod_(NO_SVE_OFFSET_MODIFIER), + shift_amount_(0) { + VIXL_ASSERT(IsVectorPlusImmediate()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-immediate", like [x0], [x0, #42] or [x0, #42, MUL_VL] + // The only supported modifiers are NO_SVE_OFFSET_MODIFIER or SVE_MUL_VL. + // + // Note that VIXL cannot currently distinguish between `SVEMemOperand(x0)` and + // `SVEMemOperand(x0, 0)`. This is only significant in scalar-plus-scalar + // instructions where xm defaults to xzr. However, users should not rely on + // `SVEMemOperand(x0, 0)` being accepted in such cases. + explicit SVEMemOperand(Register base, + uint64_t offset = 0, + SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER) + : base_(base), + regoffset_(NoReg), + offset_(RawbitsToInt64(offset)), + mod_(mod), + shift_amount_(0) { + VIXL_ASSERT(IsScalarPlusImmediate()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-scalar", like [x0, x1] + // "scalar-plus-vector", like [x0, z1.d] + SVEMemOperand(Register base, CPURegister offset) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(NO_SVE_OFFSET_MODIFIER), + shift_amount_(0) { + VIXL_ASSERT(IsScalarPlusScalar() || IsScalarPlusVector()); + if (offset.IsZero()) VIXL_ASSERT(IsEquivalentToScalar()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-vector", like [x0, z1.d, UXTW] + // The type of `mod` can be any `SVEOffsetModifier` (other than LSL), or a + // corresponding `Extend` value. + template + SVEMemOperand(Register base, ZRegister offset, M mod) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(GetSVEOffsetModifierFor(mod)), + shift_amount_(0) { + VIXL_ASSERT(mod_ != SVE_LSL); // LSL requires an explicit shift amount. + VIXL_ASSERT(IsScalarPlusVector()); + VIXL_ASSERT(IsValid()); + } + + // "scalar-plus-scalar", like [x0, x1, LSL #1] + // "scalar-plus-vector", like [x0, z1.d, LSL #2] + // The type of `mod` can be any `SVEOffsetModifier`, or a corresponding + // `Shift` or `Extend` value. + template + SVEMemOperand(Register base, CPURegister offset, M mod, unsigned shift_amount) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(GetSVEOffsetModifierFor(mod)), + shift_amount_(shift_amount) { + VIXL_ASSERT(IsValid()); + } + + // "vector-plus-scalar", like [z0.d, x0] + SVEMemOperand(ZRegister base, Register offset) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(NO_SVE_OFFSET_MODIFIER), + shift_amount_(0) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(IsVectorPlusScalar()); + } + + // "vector-plus-vector", like [z0.d, z1.d, UXTW] + template + SVEMemOperand(ZRegister base, + ZRegister offset, + M mod = NO_SVE_OFFSET_MODIFIER, + unsigned shift_amount = 0) + : base_(base), + regoffset_(offset), + offset_(0), + mod_(GetSVEOffsetModifierFor(mod)), + shift_amount_(shift_amount) { + VIXL_ASSERT(IsValid()); + VIXL_ASSERT(IsVectorPlusVector()); + } + + // True for SVEMemOperands which represent something like [x0]. + // This will also return true for [x0, #0], because there is no way + // to distinguish the two. + bool IsPlainScalar() const { + return IsScalarPlusImmediate() && (offset_ == 0); + } + + // True for SVEMemOperands which represent something like [x0], or for + // compound SVEMemOperands which are functionally equivalent, such as + // [x0, #0], [x0, xzr] or [x0, wzr, UXTW #3]. + bool IsEquivalentToScalar() const; + + // True for SVEMemOperands like [x0], [x0, #0], false for [x0, xzr] and + // similar. + bool IsPlainRegister() const; + + bool IsScalarPlusImmediate() const { + return base_.IsX() && regoffset_.IsNone() && + ((mod_ == NO_SVE_OFFSET_MODIFIER) || IsMulVl()); + } + + bool IsScalarPlusScalar() const { + // SVE offers no extend modes for scalar-plus-scalar, so both registers must + // be X registers. + return base_.IsX() && regoffset_.IsX() && + ((mod_ == NO_SVE_OFFSET_MODIFIER) || (mod_ == SVE_LSL)); + } + + bool IsScalarPlusVector() const { + // The modifier can be LSL or an an extend mode (UXTW or SXTW) here. Unlike + // in the core ISA, these extend modes do not imply an S-sized lane, so the + // modifier is independent from the lane size. The architecture describes + // [US]XTW with a D-sized lane as an "unpacked" offset. + return base_.IsX() && regoffset_.IsZRegister() && + (regoffset_.IsLaneSizeS() || regoffset_.IsLaneSizeD()) && !IsMulVl(); + } + + bool IsVectorPlusImmediate() const { + return base_.IsZRegister() && + (base_.IsLaneSizeS() || base_.IsLaneSizeD()) && + regoffset_.IsNone() && (mod_ == NO_SVE_OFFSET_MODIFIER); + } + + bool IsVectorPlusScalar() const { + return base_.IsZRegister() && regoffset_.IsX() && + (base_.IsLaneSizeS() || base_.IsLaneSizeD()); + } + + bool IsVectorPlusVector() const { + return base_.IsZRegister() && regoffset_.IsZRegister() && (offset_ == 0) && + AreSameFormat(base_, regoffset_) && + (base_.IsLaneSizeS() || base_.IsLaneSizeD()); + } + + bool IsContiguous() const { return !IsScatterGather(); } + bool IsScatterGather() const { + return base_.IsZRegister() || regoffset_.IsZRegister(); + } + + // TODO: If necessary, add helpers like `HasScalarBase()`. + + Register GetScalarBase() const { + VIXL_ASSERT(base_.IsX()); + return Register(base_); + } + + ZRegister GetVectorBase() const { + VIXL_ASSERT(base_.IsZRegister()); + VIXL_ASSERT(base_.HasLaneSize()); + return ZRegister(base_); + } + + Register GetScalarOffset() const { + VIXL_ASSERT(regoffset_.IsRegister()); + return Register(regoffset_); + } + + ZRegister GetVectorOffset() const { + VIXL_ASSERT(regoffset_.IsZRegister()); + VIXL_ASSERT(regoffset_.HasLaneSize()); + return ZRegister(regoffset_); + } + + int64_t GetImmediateOffset() const { + VIXL_ASSERT(regoffset_.IsNone()); + return offset_; + } + + SVEOffsetModifier GetOffsetModifier() const { return mod_; } + unsigned GetShiftAmount() const { return shift_amount_; } + + bool IsEquivalentToLSL(unsigned amount) const { + if (shift_amount_ != amount) return false; + if (amount == 0) { + // No-shift is equivalent to "LSL #0". + return ((mod_ == SVE_LSL) || (mod_ == NO_SVE_OFFSET_MODIFIER)); + } + return mod_ == SVE_LSL; + } + + bool IsMulVl() const { return mod_ == SVE_MUL_VL; } + + bool IsValid() const; + + private: + // Allow standard `Shift` and `Extend` arguments to be used. + SVEOffsetModifier GetSVEOffsetModifierFor(Shift shift) { + if (shift == LSL) return SVE_LSL; + if (shift == NO_SHIFT) return NO_SVE_OFFSET_MODIFIER; + // SVE does not accept any other shift. + VIXL_UNIMPLEMENTED(); + return NO_SVE_OFFSET_MODIFIER; + } + + SVEOffsetModifier GetSVEOffsetModifierFor(Extend extend = NO_EXTEND) { + if (extend == UXTW) return SVE_UXTW; + if (extend == SXTW) return SVE_SXTW; + if (extend == NO_EXTEND) return NO_SVE_OFFSET_MODIFIER; + // SVE does not accept any other extend mode. + VIXL_UNIMPLEMENTED(); + return NO_SVE_OFFSET_MODIFIER; + } + + SVEOffsetModifier GetSVEOffsetModifierFor(SVEOffsetModifier mod) { + return mod; + } + + CPURegister base_; + CPURegister regoffset_; + int64_t offset_; + SVEOffsetModifier mod_; + unsigned shift_amount_; +}; + +// Represent a signed or unsigned integer operand. +// +// This is designed to make instructions which naturally accept a _signed_ +// immediate easier to implement and use, when we also want users to be able to +// specify raw-bits values (such as with hexadecimal constants). The advantage +// of this class over a simple uint64_t (with implicit C++ sign-extension) is +// that this class can strictly check the range of allowed values. With a simple +// uint64_t, it is impossible to distinguish -1 from UINT64_MAX. +// +// For example, these instructions are equivalent: +// +// __ Insr(z0.VnB(), -1); +// __ Insr(z0.VnB(), 0xff); +// +// ... as are these: +// +// __ Insr(z0.VnD(), -1); +// __ Insr(z0.VnD(), 0xffffffffffffffff); +// +// ... but this is invalid: +// +// __ Insr(z0.VnB(), 0xffffffffffffffff); // Too big for B-sized lanes. +class IntegerOperand { + public: +#define VIXL_INT_TYPES(V) \ + V(char) V(short) V(int) V(long) V(long long) // NOLINT(runtime/int) +#define VIXL_DECL_INT_OVERLOADS(T) \ + /* These are allowed to be implicit constructors because this is a */ \ + /* wrapper class that doesn't normally perform any type conversion. */ \ + IntegerOperand(signed T immediate) /* NOLINT(runtime/explicit) */ \ + : raw_bits_(immediate), /* Allow implicit sign-extension. */ \ + is_negative_(immediate < 0) {} \ + IntegerOperand(unsigned T immediate) /* NOLINT(runtime/explicit) */ \ + : raw_bits_(immediate), is_negative_(false) {} + VIXL_INT_TYPES(VIXL_DECL_INT_OVERLOADS) +#undef VIXL_DECL_INT_OVERLOADS +#undef VIXL_INT_TYPES + + // TODO: `Operand` can currently only hold an int64_t, so some large, unsigned + // values will be misrepresented here. + explicit IntegerOperand(const Operand& operand) + : raw_bits_(operand.GetEquivalentImmediate()), + is_negative_(operand.GetEquivalentImmediate() < 0) {} + + bool IsIntN(unsigned n) const { + return is_negative_ ? vixl::IsIntN(n, RawbitsToInt64(raw_bits_)) + : vixl::IsIntN(n, raw_bits_); + } + bool IsUintN(unsigned n) const { + return !is_negative_ && vixl::IsUintN(n, raw_bits_); + } + + bool IsUint8() const { return IsUintN(8); } + bool IsUint16() const { return IsUintN(16); } + bool IsUint32() const { return IsUintN(32); } + bool IsUint64() const { return IsUintN(64); } + + bool IsInt8() const { return IsIntN(8); } + bool IsInt16() const { return IsIntN(16); } + bool IsInt32() const { return IsIntN(32); } + bool IsInt64() const { return IsIntN(64); } + + bool FitsInBits(unsigned n) const { + return is_negative_ ? IsIntN(n) : IsUintN(n); + } + bool FitsInLane(const CPURegister& zd) const { + return FitsInBits(zd.GetLaneSizeInBits()); + } + bool FitsInSignedLane(const CPURegister& zd) const { + return IsIntN(zd.GetLaneSizeInBits()); + } + bool FitsInUnsignedLane(const CPURegister& zd) const { + return IsUintN(zd.GetLaneSizeInBits()); + } + + // Cast a value in the range [INT_MIN, UINT_MAX] to an unsigned integer + // in the range [0, UINT_MAX] (using two's complement mapping). + uint64_t AsUintN(unsigned n) const { + VIXL_ASSERT(FitsInBits(n)); + return raw_bits_ & GetUintMask(n); + } + + uint8_t AsUint8() const { return static_cast(AsUintN(8)); } + uint16_t AsUint16() const { return static_cast(AsUintN(16)); } + uint32_t AsUint32() const { return static_cast(AsUintN(32)); } + uint64_t AsUint64() const { return AsUintN(64); } + + // Cast a value in the range [INT_MIN, UINT_MAX] to a signed integer in + // the range [INT_MIN, INT_MAX] (using two's complement mapping). + int64_t AsIntN(unsigned n) const { + VIXL_ASSERT(FitsInBits(n)); + return ExtractSignedBitfield64(n - 1, 0, raw_bits_); + } + + int8_t AsInt8() const { return static_cast(AsIntN(8)); } + int16_t AsInt16() const { return static_cast(AsIntN(16)); } + int32_t AsInt32() const { return static_cast(AsIntN(32)); } + int64_t AsInt64() const { return AsIntN(64); } + + // Several instructions encode a signed int_t, which is then (optionally) + // left-shifted and sign-extended to a Z register lane with a size which may + // be larger than N. This helper tries to find an int_t such that the + // IntegerOperand's arithmetic value is reproduced in each lane. + // + // This is the mechanism that allows `Insr(z0.VnB(), 0xff)` to be treated as + // `Insr(z0.VnB(), -1)`. + template + bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, T* imm) const { + VIXL_STATIC_ASSERT(std::numeric_limits::digits > N); + VIXL_ASSERT(FitsInLane(zd)); + if ((raw_bits_ & GetUintMask(kShift)) != 0) return false; + + // Reverse the specified left-shift. + IntegerOperand unshifted(*this); + unshifted.ArithmeticShiftRight(kShift); + + if (unshifted.IsIntN(N)) { + // This is trivial, since sign-extension produces the same arithmetic + // value irrespective of the destination size. + *imm = static_cast(unshifted.AsIntN(N)); + return true; + } + + // Otherwise, we might be able to use the sign-extension to produce the + // desired bit pattern. We can only do this for values in the range + // [INT_MAX + 1, UINT_MAX], where the highest set bit is the sign bit. + // + // The lane size has to be adjusted to compensate for `kShift`, since the + // high bits will be dropped when the encoded value is left-shifted. + if (unshifted.IsUintN(zd.GetLaneSizeInBits() - kShift)) { + int64_t encoded = unshifted.AsIntN(zd.GetLaneSizeInBits() - kShift); + if (vixl::IsIntN(N, encoded)) { + *imm = static_cast(encoded); + return true; + } + } + return false; + } + + // As above, but `kShift` is written to the `*shift` parameter on success, so + // that it is easy to chain calls like this: + // + // if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || + // imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { + // insn(zd, imm8, shift) + // } + template + bool TryEncodeAsShiftedIntNForLane(const CPURegister& zd, + T* imm, + S* shift) const { + if (TryEncodeAsShiftedIntNForLane(zd, imm)) { + *shift = kShift; + return true; + } + return false; + } + + // As above, but assume that `kShift` is 0. + template + bool TryEncodeAsIntNForLane(const CPURegister& zd, T* imm) const { + return TryEncodeAsShiftedIntNForLane(zd, imm); + } + + // As above, but for unsigned fields. This is usually a simple operation, but + // is provided for symmetry. + template + bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, T* imm) const { + VIXL_STATIC_ASSERT(std::numeric_limits::digits > N); + VIXL_ASSERT(FitsInLane(zd)); + + // TODO: Should we convert -1 to 0xff here? + if (is_negative_) return false; + USE(zd); + + if ((raw_bits_ & GetUintMask(kShift)) != 0) return false; + + if (vixl::IsUintN(N, raw_bits_ >> kShift)) { + *imm = static_cast(raw_bits_ >> kShift); + return true; + } + return false; + } + + template + bool TryEncodeAsShiftedUintNForLane(const CPURegister& zd, + T* imm, + S* shift) const { + if (TryEncodeAsShiftedUintNForLane(zd, imm)) { + *shift = kShift; + return true; + } + return false; + } + + bool IsZero() const { return raw_bits_ == 0; } + bool IsNegative() const { return is_negative_; } + bool IsPositiveOrZero() const { return !is_negative_; } + + uint64_t GetMagnitude() const { + return is_negative_ ? UnsignedNegate(raw_bits_) : raw_bits_; + } + + private: + // Shift the arithmetic value right, with sign extension if is_negative_. + void ArithmeticShiftRight(int shift) { + VIXL_ASSERT((shift >= 0) && (shift < 64)); + if (shift == 0) return; + if (is_negative_) { + raw_bits_ = ExtractSignedBitfield64(63, shift, raw_bits_); + } else { + raw_bits_ >>= shift; + } + } + + uint64_t raw_bits_; + bool is_negative_; +}; + +// This an abstraction that can represent a register or memory location. The +// `MacroAssembler` provides helpers to move data between generic operands. +class GenericOperand { + public: + GenericOperand() { VIXL_ASSERT(!IsValid()); } + GenericOperand(const CPURegister& reg); // NOLINT(runtime/explicit) + GenericOperand(const MemOperand& mem_op, + size_t mem_op_size = 0); // NOLINT(runtime/explicit) + + bool IsValid() const { return cpu_register_.IsValid() != mem_op_.IsValid(); } + + bool Equals(const GenericOperand& other) const; + + bool IsCPURegister() const { + VIXL_ASSERT(IsValid()); + return cpu_register_.IsValid(); + } + + bool IsRegister() const { + return IsCPURegister() && cpu_register_.IsRegister(); + } + + bool IsVRegister() const { + return IsCPURegister() && cpu_register_.IsVRegister(); + } + + bool IsSameCPURegisterType(const GenericOperand& other) { + return IsCPURegister() && other.IsCPURegister() && + GetCPURegister().IsSameType(other.GetCPURegister()); + } + + bool IsMemOperand() const { + VIXL_ASSERT(IsValid()); + return mem_op_.IsValid(); + } + + CPURegister GetCPURegister() const { + VIXL_ASSERT(IsCPURegister()); + return cpu_register_; + } + + MemOperand GetMemOperand() const { + VIXL_ASSERT(IsMemOperand()); + return mem_op_; + } + + size_t GetMemOperandSizeInBytes() const { + VIXL_ASSERT(IsMemOperand()); + return mem_op_size_; + } + + size_t GetSizeInBytes() const { + return IsCPURegister() ? cpu_register_.GetSizeInBytes() + : GetMemOperandSizeInBytes(); + } + + size_t GetSizeInBits() const { return GetSizeInBytes() * kBitsPerByte; } + + private: + CPURegister cpu_register_; + MemOperand mem_op_; + // The size of the memory region pointed to, in bytes. + // We only support sizes up to X/D register sizes. + size_t mem_op_size_; +}; +} +} // namespace vixl::aarch64 + +#endif // VIXL_AARCH64_OPERANDS_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/registers-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/registers-aarch64.h new file mode 100644 index 0000000000..f9a6d897f5 --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/registers-aarch64.h @@ -0,0 +1,901 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_REGISTERS_AARCH64_H_ +#define VIXL_AARCH64_REGISTERS_AARCH64_H_ + +#include + +#include "instructions-aarch64.h" + +namespace vixl { +namespace aarch64 { + +// An integer type capable of representing a homogeneous, non-overlapping set of +// registers as a bitmask of their codes. +typedef uint64_t RegList; +static const int kRegListSizeInBits = sizeof(RegList) * 8; + +class Register; +class WRegister; +class XRegister; + +class VRegister; +class BRegister; +class HRegister; +class SRegister; +class DRegister; +class QRegister; + +class ZRegister; + +class PRegister; +class PRegisterWithLaneSize; +class PRegisterM; +class PRegisterZ; + +// A container for any single register supported by the processor. Selected +// qualifications are also supported. Basic registers can be constructed +// directly as CPURegister objects. Other variants should be constructed as one +// of the derived classes. +// +// CPURegister aims to support any getter that would also be available to more +// specialised register types. However, using the equivalent functions on the +// specialised register types can avoid run-time checks, and should therefore be +// preferred where run-time polymorphism isn't required. +// +// Type-specific modifiers are typically implemented only on the derived +// classes. +// +// The encoding is such that CPURegister objects are cheap to pass by value. +class CPURegister { + public: + enum RegisterBank : uint8_t { + kNoRegisterBank = 0, + kRRegisterBank, + kVRegisterBank, + kPRegisterBank + }; + enum RegisterType { + kNoRegister, + kRegister, + kVRegister, + kZRegister, + kPRegister + }; + + static const unsigned kUnknownSize = 0; + + VIXL_CONSTEXPR CPURegister() + : code_(0), + bank_(kNoRegisterBank), + size_(kEncodedUnknownSize), + qualifiers_(kNoQualifiers), + lane_size_(kEncodedUnknownSize) {} + + CPURegister(int code, int size_in_bits, RegisterType type) + : code_(code), + bank_(GetBankFor(type)), + size_(EncodeSizeInBits(size_in_bits)), + qualifiers_(kNoQualifiers), + lane_size_(EncodeSizeInBits(size_in_bits)) { + VIXL_ASSERT(IsValid()); + } + + // Basic accessors. + + // TODO: Make this return 'int'. + unsigned GetCode() const { return code_; } + + RegisterBank GetBank() const { return bank_; } + + // For scalar registers, the lane size matches the register size, and is + // always known. + bool HasSize() const { return size_ != kEncodedUnknownSize; } + bool HasLaneSize() const { return lane_size_ != kEncodedUnknownSize; } + + RegList GetBit() const { + if (IsNone()) return 0; + VIXL_ASSERT(code_ < kRegListSizeInBits); + return static_cast(1) << code_; + } + + // Return the architectural name for this register. + // TODO: This is temporary. Ultimately, we should move the + // Simulator::*RegNameForCode helpers out of the simulator, and provide an + // independent way to obtain the name of a register. + std::string GetArchitecturalName() const; + + // Return the highest valid register code for this type, to allow generic + // loops to be written. This excludes kSPRegInternalCode, since it is not + // contiguous, and sp usually requires special handling anyway. + unsigned GetMaxCode() const { return GetMaxCodeFor(GetBank()); } + + // Registers without a known size report kUnknownSize. + int GetSizeInBits() const { return DecodeSizeInBits(size_); } + int GetSizeInBytes() const { return DecodeSizeInBytes(size_); } + // TODO: Make these return 'int'. + unsigned GetLaneSizeInBits() const { return DecodeSizeInBits(lane_size_); } + unsigned GetLaneSizeInBytes() const { return DecodeSizeInBytes(lane_size_); } + unsigned GetLaneSizeInBytesLog2() const { + VIXL_ASSERT(HasLaneSize()); + return DecodeSizeInBytesLog2(lane_size_); + } + + int GetLanes() const { + if (HasSize() && HasLaneSize()) { + // Take advantage of the size encoding to calculate this efficiently. + VIXL_STATIC_ASSERT(kEncodedHRegSize == (kEncodedBRegSize + 1)); + VIXL_STATIC_ASSERT(kEncodedSRegSize == (kEncodedHRegSize + 1)); + VIXL_STATIC_ASSERT(kEncodedDRegSize == (kEncodedSRegSize + 1)); + VIXL_STATIC_ASSERT(kEncodedQRegSize == (kEncodedDRegSize + 1)); + int log2_delta = static_cast(size_) - static_cast(lane_size_); + VIXL_ASSERT(log2_delta >= 0); + return 1 << log2_delta; + } + return kUnknownSize; + } + + bool Is8Bits() const { return size_ == kEncodedBRegSize; } + bool Is16Bits() const { return size_ == kEncodedHRegSize; } + bool Is32Bits() const { return size_ == kEncodedSRegSize; } + bool Is64Bits() const { return size_ == kEncodedDRegSize; } + bool Is128Bits() const { return size_ == kEncodedQRegSize; } + + bool IsLaneSizeB() const { return lane_size_ == kEncodedBRegSize; } + bool IsLaneSizeH() const { return lane_size_ == kEncodedHRegSize; } + bool IsLaneSizeS() const { return lane_size_ == kEncodedSRegSize; } + bool IsLaneSizeD() const { return lane_size_ == kEncodedDRegSize; } + bool IsLaneSizeQ() const { return lane_size_ == kEncodedQRegSize; } + + // If IsRegister(), then it is valid to convert the CPURegister to some + // Register type. + // + // If... ... then it is safe to construct ... + // r.IsRegister() -> Register(r) + // r.IsVRegister() -> VRegister(r) + // r.IsZRegister() -> ZRegister(r) + // r.IsPRegister() -> PRegister(r) + // + // r.IsPRegister() && HasLaneSize() -> PRegisterWithLaneSize(r) + // r.IsPRegister() && IsMerging() -> PRegisterM(r) + // r.IsPRegister() && IsZeroing() -> PRegisterZ(r) + bool IsRegister() const { return GetType() == kRegister; } + bool IsVRegister() const { return GetType() == kVRegister; } + bool IsZRegister() const { return GetType() == kZRegister; } + bool IsPRegister() const { return GetType() == kPRegister; } + + bool IsNone() const { return GetType() == kNoRegister; } + + // `GetType() == kNoRegister` implies IsNone(), and vice-versa. + // `GetType() == kRegister` implies IsRegister(), and vice-versa. + RegisterType GetType() const { + switch (bank_) { + case kNoRegisterBank: + return kNoRegister; + case kRRegisterBank: + return kRegister; + case kVRegisterBank: + return HasSize() ? kVRegister : kZRegister; + case kPRegisterBank: + return kPRegister; + } + VIXL_UNREACHABLE(); + return kNoRegister; + } + + // IsFPRegister() is true for scalar FP types (and therefore implies + // IsVRegister()). There is no corresponding FPRegister type. + bool IsFPRegister() const { return Is1H() || Is1S() || Is1D(); } + + // TODO: These are stricter forms of the helpers above. We should make the + // basic helpers strict, and remove these. + bool IsValidRegister() const; + bool IsValidVRegister() const; + bool IsValidFPRegister() const; + bool IsValidZRegister() const; + bool IsValidPRegister() const; + + bool IsValid() const; + bool IsValidOrNone() const { return IsNone() || IsValid(); } + + bool IsVector() const { return HasLaneSize() && (size_ != lane_size_); } + bool IsScalar() const { return HasLaneSize() && (size_ == lane_size_); } + + bool IsSameType(const CPURegister& other) const { + return GetType() == other.GetType(); + } + + bool IsSameBank(const CPURegister& other) const { + return GetBank() == other.GetBank(); + } + + // Two registers with unknown size are considered to have the same size if + // they also have the same type. For example, all Z registers have the same + // size, even though we don't know what that is. + bool IsSameSizeAndType(const CPURegister& other) const { + return IsSameType(other) && (size_ == other.size_); + } + + bool IsSameFormat(const CPURegister& other) const { + return IsSameSizeAndType(other) && (lane_size_ == other.lane_size_); + } + + // Note that NoReg aliases itself, so that 'Is' implies 'Aliases'. + bool Aliases(const CPURegister& other) const { + return IsSameBank(other) && (code_ == other.code_); + } + + bool Is(const CPURegister& other) const { + if (IsRegister() || IsVRegister()) { + // For core (W, X) and FP/NEON registers, we only consider the code, size + // and type. This is legacy behaviour. + // TODO: We should probably check every field for all registers. + return Aliases(other) && (size_ == other.size_); + } else { + // For Z and P registers, we require all fields to match exactly. + VIXL_ASSERT(IsNone() || IsZRegister() || IsPRegister()); + return (code_ == other.code_) && (bank_ == other.bank_) && + (size_ == other.size_) && (qualifiers_ == other.qualifiers_) && + (lane_size_ == other.lane_size_); + } + } + + // Conversions to specific register types. The result is a register that + // aliases the original CPURegister. That is, the original register bank + // (`GetBank()`) is checked and the code (`GetCode()`) preserved, but all + // other properties are ignored. + // + // Typical usage: + // + // if (reg.GetBank() == kVRegisterBank) { + // DRegister d = reg.D(); + // ... + // } + // + // These could all return types with compile-time guarantees (like XRegister), + // but this breaks backwards-compatibility quite severely, particularly with + // code like `cond ? reg.W() : reg.X()`, which would have indeterminate type. + + // Core registers, like "w0". + Register W() const; + Register X() const; + // FP/NEON registers, like "b0". + VRegister B() const; + VRegister H() const; + VRegister S() const; + VRegister D() const; + VRegister Q() const; + VRegister V() const; + // SVE registers, like "z0". + ZRegister Z() const; + PRegister P() const; + + // Utilities for kRegister types. + + bool IsZero() const { return IsRegister() && (code_ == kZeroRegCode); } + bool IsSP() const { return IsRegister() && (code_ == kSPRegInternalCode); } + bool IsW() const { return IsRegister() && Is32Bits(); } + bool IsX() const { return IsRegister() && Is64Bits(); } + + // Utilities for FP/NEON kVRegister types. + + // These helpers ensure that the size and type of the register are as + // described. They do not consider the number of lanes that make up a vector. + // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() + // does not imply Is1D() or Is8B(). + // Check the number of lanes, ie. the format of the vector, using methods such + // as Is8B(), Is1D(), etc. + bool IsB() const { return IsVRegister() && Is8Bits(); } + bool IsH() const { return IsVRegister() && Is16Bits(); } + bool IsS() const { return IsVRegister() && Is32Bits(); } + bool IsD() const { return IsVRegister() && Is64Bits(); } + bool IsQ() const { return IsVRegister() && Is128Bits(); } + + // As above, but also check that the register has exactly one lane. For + // example, reg.Is1D() implies DRegister(reg).IsValid(), but reg.IsD() does + // not. + bool Is1B() const { return IsB() && IsScalar(); } + bool Is1H() const { return IsH() && IsScalar(); } + bool Is1S() const { return IsS() && IsScalar(); } + bool Is1D() const { return IsD() && IsScalar(); } + bool Is1Q() const { return IsQ() && IsScalar(); } + + // Check the specific NEON format. + bool Is8B() const { return IsD() && IsLaneSizeB(); } + bool Is16B() const { return IsQ() && IsLaneSizeB(); } + bool Is2H() const { return IsS() && IsLaneSizeH(); } + bool Is4H() const { return IsD() && IsLaneSizeH(); } + bool Is8H() const { return IsQ() && IsLaneSizeH(); } + bool Is2S() const { return IsD() && IsLaneSizeS(); } + bool Is4S() const { return IsQ() && IsLaneSizeS(); } + bool Is2D() const { return IsQ() && IsLaneSizeD(); } + + // A semantic alias for sdot and udot (indexed and by element) instructions. + // The current CPURegister implementation cannot not tell this from Is1S(), + // but it might do later. + // TODO: Do this with the qualifiers_ field. + bool Is1S4B() const { return Is1S(); } + + // Utilities for SVE registers. + + bool IsUnqualified() const { return qualifiers_ == kNoQualifiers; } + bool IsMerging() const { return IsPRegister() && (qualifiers_ == kMerging); } + bool IsZeroing() const { return IsPRegister() && (qualifiers_ == kZeroing); } + + // SVE types have unknown sizes, but within known bounds. + + int GetMaxSizeInBytes() const { + switch (GetType()) { + case kZRegister: + return kZRegMaxSizeInBytes; + case kPRegister: + return kPRegMaxSizeInBytes; + default: + VIXL_ASSERT(HasSize()); + return GetSizeInBits(); + } + } + + int GetMinSizeInBytes() const { + switch (GetType()) { + case kZRegister: + return kZRegMinSizeInBytes; + case kPRegister: + return kPRegMinSizeInBytes; + default: + VIXL_ASSERT(HasSize()); + return GetSizeInBits(); + } + } + + int GetMaxSizeInBits() const { return GetMaxSizeInBytes() * kBitsPerByte; } + int GetMinSizeInBits() const { return GetMinSizeInBytes() * kBitsPerByte; } + + static RegisterBank GetBankFor(RegisterType type) { + switch (type) { + case kNoRegister: + return kNoRegisterBank; + case kRegister: + return kRRegisterBank; + case kVRegister: + case kZRegister: + return kVRegisterBank; + case kPRegister: + return kPRegisterBank; + } + VIXL_UNREACHABLE(); + return kNoRegisterBank; + } + + static unsigned GetMaxCodeFor(CPURegister::RegisterType type) { + return GetMaxCodeFor(GetBankFor(type)); + } + + protected: + enum EncodedSize : uint8_t { + // Ensure that kUnknownSize (and therefore kNoRegister) is encoded as zero. + kEncodedUnknownSize = 0, + + // The implementation assumes that the remaining sizes are encoded as + // `log2(size) + c`, so the following names must remain in sequence. + kEncodedBRegSize, + kEncodedHRegSize, + kEncodedSRegSize, + kEncodedDRegSize, + kEncodedQRegSize, + + kEncodedWRegSize = kEncodedSRegSize, + kEncodedXRegSize = kEncodedDRegSize + }; + VIXL_STATIC_ASSERT(kSRegSize == kWRegSize); + VIXL_STATIC_ASSERT(kDRegSize == kXRegSize); + + char GetLaneSizeSymbol() const { + switch (lane_size_) { + case kEncodedBRegSize: + return 'B'; + case kEncodedHRegSize: + return 'H'; + case kEncodedSRegSize: + return 'S'; + case kEncodedDRegSize: + return 'D'; + case kEncodedQRegSize: + return 'Q'; + case kEncodedUnknownSize: + break; + } + VIXL_UNREACHABLE(); + return '?'; + } + + static EncodedSize EncodeSizeInBits(int size_in_bits) { + switch (size_in_bits) { + case kUnknownSize: + return kEncodedUnknownSize; + case kBRegSize: + return kEncodedBRegSize; + case kHRegSize: + return kEncodedHRegSize; + case kSRegSize: + return kEncodedSRegSize; + case kDRegSize: + return kEncodedDRegSize; + case kQRegSize: + return kEncodedQRegSize; + } + VIXL_UNREACHABLE(); + return kEncodedUnknownSize; + } + + static int DecodeSizeInBytesLog2(EncodedSize encoded_size) { + switch (encoded_size) { + case kEncodedUnknownSize: + // Log2 of B-sized lane in bytes is 0, so we can't just return 0 here. + VIXL_UNREACHABLE(); + return -1; + case kEncodedBRegSize: + return kBRegSizeInBytesLog2; + case kEncodedHRegSize: + return kHRegSizeInBytesLog2; + case kEncodedSRegSize: + return kSRegSizeInBytesLog2; + case kEncodedDRegSize: + return kDRegSizeInBytesLog2; + case kEncodedQRegSize: + return kQRegSizeInBytesLog2; + } + VIXL_UNREACHABLE(); + return kUnknownSize; + } + + static int DecodeSizeInBytes(EncodedSize encoded_size) { + if (encoded_size == kEncodedUnknownSize) { + return kUnknownSize; + } + return 1 << DecodeSizeInBytesLog2(encoded_size); + } + + static int DecodeSizeInBits(EncodedSize encoded_size) { + VIXL_STATIC_ASSERT(kUnknownSize == 0); + return DecodeSizeInBytes(encoded_size) * kBitsPerByte; + } + + static unsigned GetMaxCodeFor(CPURegister::RegisterBank bank); + + enum Qualifiers : uint8_t { + kNoQualifiers = 0, + // Used by P registers. + kMerging, + kZeroing + }; + + // An unchecked constructor, for use by derived classes. + CPURegister(int code, + EncodedSize size, + RegisterBank bank, + EncodedSize lane_size, + Qualifiers qualifiers = kNoQualifiers) + : code_(code), + bank_(bank), + size_(size), + qualifiers_(qualifiers), + lane_size_(lane_size) {} + + // TODO: Check that access to these fields is reasonably efficient. + uint8_t code_; + RegisterBank bank_; + EncodedSize size_; + Qualifiers qualifiers_; + EncodedSize lane_size_; +}; +// Ensure that CPURegisters can fit in a single (64-bit) register. This is a +// proxy for being "cheap to pass by value", which is hard to check directly. +VIXL_STATIC_ASSERT(sizeof(CPURegister) <= sizeof(uint64_t)); + +// TODO: Add constexpr constructors. +#define VIXL_DECLARE_REGISTER_COMMON(NAME, REGISTER_TYPE, PARENT_TYPE) \ + VIXL_CONSTEXPR NAME() : PARENT_TYPE() {} \ + \ + explicit NAME(CPURegister other) : PARENT_TYPE(other) { \ + VIXL_ASSERT(IsValid()); \ + } \ + \ + VIXL_CONSTEXPR static unsigned GetMaxCode() { \ + return kNumberOf##REGISTER_TYPE##s - 1; \ + } + +// Any W or X register, including the zero register and the stack pointer. +class Register : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(Register, Register, CPURegister) + + Register(int code, int size_in_bits) + : CPURegister(code, size_in_bits, kRegister) { + VIXL_ASSERT(IsValidRegister()); + } + + bool IsValid() const { return IsValidRegister(); } +}; + +// Any FP or NEON V register, including vector (V.) and scalar forms +// (B, H, S, D, Q). +class VRegister : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(VRegister, VRegister, CPURegister) + + // For historical reasons, VRegister(0) returns v0.1Q (or equivalently, q0). + explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1) + : CPURegister(code, + EncodeSizeInBits(size_in_bits), + kVRegisterBank, + EncodeLaneSizeInBits(size_in_bits, lanes)) { + VIXL_ASSERT(IsValidVRegister()); + } + + VRegister(int code, VectorFormat format) + : CPURegister(code, + EncodeSizeInBits(RegisterSizeInBitsFromFormat(format)), + kVRegisterBank, + EncodeSizeInBits(LaneSizeInBitsFromFormat(format)), + kNoQualifiers) { + VIXL_ASSERT(IsValid()); + } + + VRegister V8B() const; + VRegister V16B() const; + VRegister V2H() const; + VRegister V4H() const; + VRegister V8H() const; + VRegister V2S() const; + VRegister V4S() const; + VRegister V1D() const; + VRegister V2D() const; + VRegister S4B() const; + + bool IsValid() const { return IsValidVRegister(); } + + protected: + static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) { + VIXL_ASSERT(lanes >= 1); + VIXL_ASSERT((size_in_bits % lanes) == 0); + return EncodeSizeInBits(size_in_bits / lanes); + } +}; + +// Any SVE Z register, with or without a lane size specifier. +class ZRegister : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(ZRegister, ZRegister, CPURegister) + + explicit ZRegister(int code, int lane_size_in_bits = kUnknownSize) + : CPURegister(code, + kEncodedUnknownSize, + kVRegisterBank, + EncodeSizeInBits(lane_size_in_bits)) { + VIXL_ASSERT(IsValid()); + } + + ZRegister(int code, VectorFormat format) + : CPURegister(code, + kEncodedUnknownSize, + kVRegisterBank, + EncodeSizeInBits(LaneSizeInBitsFromFormat(format)), + kNoQualifiers) { + VIXL_ASSERT(IsValid()); + } + + // Return a Z register with a known lane size (like "z0.B"). + ZRegister VnB() const { return ZRegister(GetCode(), kBRegSize); } + ZRegister VnH() const { return ZRegister(GetCode(), kHRegSize); } + ZRegister VnS() const { return ZRegister(GetCode(), kSRegSize); } + ZRegister VnD() const { return ZRegister(GetCode(), kDRegSize); } + ZRegister VnQ() const { return ZRegister(GetCode(), kQRegSize); } + + template + ZRegister WithLaneSize(T format) const { + return ZRegister(GetCode(), format); + } + + ZRegister WithSameLaneSizeAs(const CPURegister& other) const { + VIXL_ASSERT(other.HasLaneSize()); + return this->WithLaneSize(other.GetLaneSizeInBits()); + } + + bool IsValid() const { return IsValidZRegister(); } +}; + +// Any SVE P register, with or without a qualifier or lane size specifier. +class PRegister : public CPURegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegister, PRegister, CPURegister) + + explicit PRegister(int code) : CPURegister(code, kUnknownSize, kPRegister) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && !HasLaneSize() && IsUnqualified(); + } + + // Return a P register with a known lane size (like "p0.B"). + PRegisterWithLaneSize VnB() const; + PRegisterWithLaneSize VnH() const; + PRegisterWithLaneSize VnS() const; + PRegisterWithLaneSize VnD() const; + + template + PRegisterWithLaneSize WithLaneSize(T format) const; + + PRegisterWithLaneSize WithSameLaneSizeAs(const CPURegister& other) const; + + // SVE predicates are specified (in normal assembly) with a "/z" (zeroing) or + // "/m" (merging) suffix. These methods are VIXL's equivalents. + PRegisterZ Zeroing() const; + PRegisterM Merging() const; + + protected: + // Unchecked constructors, for use by derived classes. + PRegister(int code, EncodedSize encoded_lane_size) + : CPURegister(code, + kEncodedUnknownSize, + kPRegisterBank, + encoded_lane_size, + kNoQualifiers) {} + + PRegister(int code, Qualifiers qualifiers) + : CPURegister(code, + kEncodedUnknownSize, + kPRegisterBank, + kEncodedUnknownSize, + qualifiers) {} +}; + +// Any SVE P register with a known lane size (like "p0.B"). +class PRegisterWithLaneSize : public PRegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegisterWithLaneSize, PRegister, PRegister) + + PRegisterWithLaneSize(int code, int lane_size_in_bits) + : PRegister(code, EncodeSizeInBits(lane_size_in_bits)) { + VIXL_ASSERT(IsValid()); + } + + PRegisterWithLaneSize(int code, VectorFormat format) + : PRegister(code, EncodeSizeInBits(LaneSizeInBitsFromFormat(format))) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && HasLaneSize() && IsUnqualified(); + } + + // Overload lane size accessors so we can assert `HasLaneSize()`. This allows + // tools such as clang-tidy to prove that the result of GetLaneSize* is + // non-zero. + + // TODO: Make these return 'int'. + unsigned GetLaneSizeInBits() const { + VIXL_ASSERT(HasLaneSize()); + return PRegister::GetLaneSizeInBits(); + } + + unsigned GetLaneSizeInBytes() const { + VIXL_ASSERT(HasLaneSize()); + return PRegister::GetLaneSizeInBytes(); + } +}; + +// Any SVE P register with the zeroing qualifier (like "p0/z"). +class PRegisterZ : public PRegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegisterZ, PRegister, PRegister) + + explicit PRegisterZ(int code) : PRegister(code, kZeroing) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && !HasLaneSize() && IsZeroing(); + } +}; + +// Any SVE P register with the merging qualifier (like "p0/m"). +class PRegisterM : public PRegister { + public: + VIXL_DECLARE_REGISTER_COMMON(PRegisterM, PRegister, PRegister) + + explicit PRegisterM(int code) : PRegister(code, kMerging) { + VIXL_ASSERT(IsValid()); + } + + bool IsValid() const { + return IsValidPRegister() && !HasLaneSize() && IsMerging(); + } +}; + +inline PRegisterWithLaneSize PRegister::VnB() const { + return PRegisterWithLaneSize(GetCode(), kBRegSize); +} +inline PRegisterWithLaneSize PRegister::VnH() const { + return PRegisterWithLaneSize(GetCode(), kHRegSize); +} +inline PRegisterWithLaneSize PRegister::VnS() const { + return PRegisterWithLaneSize(GetCode(), kSRegSize); +} +inline PRegisterWithLaneSize PRegister::VnD() const { + return PRegisterWithLaneSize(GetCode(), kDRegSize); +} + +template +inline PRegisterWithLaneSize PRegister::WithLaneSize(T format) const { + return PRegisterWithLaneSize(GetCode(), format); +} + +inline PRegisterWithLaneSize PRegister::WithSameLaneSizeAs( + const CPURegister& other) const { + VIXL_ASSERT(other.HasLaneSize()); + return this->WithLaneSize(other.GetLaneSizeInBits()); +} + +inline PRegisterZ PRegister::Zeroing() const { return PRegisterZ(GetCode()); } +inline PRegisterM PRegister::Merging() const { return PRegisterM(GetCode()); } + +#define VIXL_REGISTER_WITH_SIZE_LIST(V) \ + V(WRegister, kWRegSize, Register) \ + V(XRegister, kXRegSize, Register) \ + V(QRegister, kQRegSize, VRegister) \ + V(DRegister, kDRegSize, VRegister) \ + V(SRegister, kSRegSize, VRegister) \ + V(HRegister, kHRegSize, VRegister) \ + V(BRegister, kBRegSize, VRegister) + +#define VIXL_DEFINE_REGISTER_WITH_SIZE(NAME, SIZE, PARENT) \ + class NAME : public PARENT { \ + public: \ + VIXL_CONSTEXPR NAME() : PARENT() {} \ + explicit NAME(int code) : PARENT(code, SIZE) {} \ + \ + explicit NAME(PARENT other) : PARENT(other) { \ + VIXL_ASSERT(GetSizeInBits() == SIZE); \ + } \ + \ + PARENT As##PARENT() const { return *this; } \ + \ + VIXL_CONSTEXPR int GetSizeInBits() const { return SIZE; } \ + \ + bool IsValid() const { \ + return PARENT::IsValid() && (PARENT::GetSizeInBits() == SIZE); \ + } \ + }; + +VIXL_REGISTER_WITH_SIZE_LIST(VIXL_DEFINE_REGISTER_WITH_SIZE) + +// No*Reg is used to provide default values for unused arguments, error cases +// and so on. Note that these (and the default constructors) all compare equal +// (using the Is() method). +const Register NoReg; +const VRegister NoVReg; +const CPURegister NoCPUReg; +const ZRegister NoZReg; + +// TODO: Ideally, these would use specialised register types (like XRegister and +// so on). However, doing so throws up template overloading problems elsewhere. +#define VIXL_DEFINE_REGISTERS(N) \ + const Register w##N = WRegister(N); \ + const Register x##N = XRegister(N); \ + const VRegister b##N = BRegister(N); \ + const VRegister h##N = HRegister(N); \ + const VRegister s##N = SRegister(N); \ + const VRegister d##N = DRegister(N); \ + const VRegister q##N = QRegister(N); \ + const VRegister v##N(N); \ + const ZRegister z##N(N); +AARCH64_REGISTER_CODE_LIST(VIXL_DEFINE_REGISTERS) +#undef VIXL_DEFINE_REGISTERS + +#define VIXL_DEFINE_P_REGISTERS(N) const PRegister p##N(N); +AARCH64_P_REGISTER_CODE_LIST(VIXL_DEFINE_P_REGISTERS) +#undef VIXL_DEFINE_P_REGISTERS + +// VIXL represents 'sp' with a unique code, to tell it apart from 'xzr'. +const Register wsp = WRegister(kSPRegInternalCode); +const Register sp = XRegister(kSPRegInternalCode); + +// Standard aliases. +const Register ip0 = x16; +const Register ip1 = x17; +const Register lr = x30; +const Register xzr = x31; +const Register wzr = w31; + +// AreAliased returns true if any of the named registers overlap. Arguments +// set to NoReg are ignored. The system stack pointer may be specified. +bool AreAliased(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + +// AreSameSizeAndType returns true if all of the specified registers have the +// same size, and are of the same type. The system stack pointer may be +// specified. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreSameSizeAndType(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg, + const CPURegister& reg5 = NoCPUReg, + const CPURegister& reg6 = NoCPUReg, + const CPURegister& reg7 = NoCPUReg, + const CPURegister& reg8 = NoCPUReg); + +// AreEven returns true if all of the specified registers have even register +// indices. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoCPUReg). +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoReg, + const CPURegister& reg4 = NoReg, + const CPURegister& reg5 = NoReg, + const CPURegister& reg6 = NoReg, + const CPURegister& reg7 = NoReg, + const CPURegister& reg8 = NoReg); + +// AreConsecutive returns true if all of the specified registers are +// consecutive in the register file. Arguments set to NoReg are ignored, as are +// any subsequent arguments. At least one argument (reg1) must be valid +// (not NoCPUReg). +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + +// AreSameFormat returns true if all of the specified registers have the same +// vector format. Arguments set to NoReg are ignored, as are any subsequent +// arguments. At least one argument (reg1) must be valid (not NoVReg). +bool AreSameFormat(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); + +// AreSameLaneSize returns true if all of the specified registers have the same +// element lane size, B, H, S or D. It doesn't compare the type of registers. +// Arguments set to NoReg are ignored, as are any subsequent arguments. +// At least one argument (reg1) must be valid (not NoVReg). +// TODO: Remove this, and replace its uses with AreSameFormat. +bool AreSameLaneSize(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3 = NoCPUReg, + const CPURegister& reg4 = NoCPUReg); +} +} // namespace vixl::aarch64 + +#endif // VIXL_AARCH64_REGISTERS_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/simulator-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/simulator-aarch64.h new file mode 100644 index 0000000000..fc2eedd54f --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/simulator-aarch64.h @@ -0,0 +1,5261 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_SIMULATOR_AARCH64_H_ +#define VIXL_AARCH64_SIMULATOR_AARCH64_H_ + +#include +#include +#include + +#include "../globals-vixl.h" +#include "../utils-vixl.h" + +#include "../cpu-features.h" +#include "abi-aarch64.h" +#include "cpu-features-auditor-aarch64.h" +#include "disasm-aarch64.h" +#include "instructions-aarch64.h" +#include "simulator-constants-aarch64.h" + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + +// These are only used for the ABI feature, and depend on checks performed for +// it. +#ifdef VIXL_HAS_ABI_SUPPORT +#include +#if __cplusplus >= 201402L +// Required for `std::index_sequence` +#include +#endif +#endif + +// The hosts that Simulator running on may not have these flags defined. +#ifndef PROT_BTI +#define PROT_BTI 0x10 +#endif +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif + +namespace vixl { +namespace aarch64 { + +class Simulator; +struct RuntimeCallStructHelper; + +class SimStack { + public: + SimStack() {} + explicit SimStack(size_t size) : usable_size_(size) {} + + // Guard against accesses above the stack base. This could occur, for example, + // if the first simulated function tries to read stack arguments that haven't + // been properly initialised in the Simulator's stack. + void SetBaseGuardSize(size_t size) { base_guard_size_ = size; } + + // Guard against stack overflows. The size should be large enough to detect + // the largest stride made (by `MacroAssembler::Claim()` or equivalent) whilst + // initialising stack objects. + void SetLimitGuardSize(size_t size) { limit_guard_size_ = size; } + + // The minimum usable size of the stack. + // Equal to "stack base" - "stack limit", in AAPCS64 terminology. + void SetUsableSize(size_t size) { usable_size_ = size; } + + // Set the minimum alignment for the stack parameters. + void AlignToBytesLog2(int align_log2) { align_log2_ = align_log2; } + + class Allocated { + public: + // Using AAPCS64 terminology, highest addresses at the top: + // + // data_.get() + alloc_size -> + // | + // | Base guard + // GetBase() -> | | + // | | + // | | AAPCS64-legal + // | Usable stack | values of 'sp'. + // | | + // | | + // GetLimit() -> | + // | Limit guard + // data_.get() -> | + // + // The Simulator detects (and forbids) accesses to either guard region. + + char* GetBase() const { return base_; } + char* GetLimit() const { return limit_; } + + template + bool IsAccessInGuardRegion(const T* base, size_t size) const { + VIXL_ASSERT(size > 0); + // Inclusive bounds. + const char* start = reinterpret_cast(base); + const char* end = start + size - 1; + const char* data_start = data_.get(); + const char* data_end = data_start + alloc_size_ - 1; + bool in_base_guard = (start <= data_end) && (end >= base_); + bool in_limit_guard = (start <= limit_) && (end >= data_start); + return in_base_guard || in_limit_guard; + } + + private: + std::unique_ptr data_; + char* limit_; + char* base_; + size_t alloc_size_; + + friend class SimStack; + }; + + // Allocate the stack, locking the parameters. + Allocated Allocate() { + size_t align_to = 1 << align_log2_; + size_t l = AlignUp(limit_guard_size_, align_to); + size_t u = AlignUp(usable_size_, align_to); + size_t b = AlignUp(base_guard_size_, align_to); + size_t size = l + u + b; + + Allocated a; + size_t alloc_size = (align_to - 1) + size; + a.data_ = std::make_unique(alloc_size); + void* data = a.data_.get(); + auto data_aligned = + reinterpret_cast(std::align(align_to, size, data, alloc_size)); + a.limit_ = data_aligned + l - 1; + a.base_ = data_aligned + l + u; + a.alloc_size_ = alloc_size; + return a; + } + + private: + size_t base_guard_size_ = 256; + size_t limit_guard_size_ = 4 * 1024; + size_t usable_size_ = 8 * 1024; + size_t align_log2_ = 4; + + static const size_t kDefaultBaseGuardSize = 256; + static const size_t kDefaultLimitGuardSize = 4 * 1024; + static const size_t kDefaultUsableSize = 8 * 1024; +}; + +// Armv8.5 MTE helpers. +inline int GetAllocationTagFromAddress(uint64_t address) { + return static_cast(ExtractUnsignedBitfield64(59, 56, address)); +} + +template +T AddressUntag(T address) { + // Cast the address using a C-style cast. A reinterpret_cast would be + // appropriate, but it can't cast one integral type to another. + uint64_t bits = (uint64_t)address; + return (T)(bits & ~kAddressTagMask); +} + +// A callback function, called when a function has been intercepted if a +// BranchInterception entry exists in branch_interceptions. The address of +// the intercepted function is passed to the callback. For usage see +// BranchInterception. +using InterceptionCallback = std::function; + +class MetaDataDepot { + public: + class MetaDataMTE { + public: + explicit MetaDataMTE(int tag) : tag_(tag) {} + + int GetTag() const { return tag_; } + void SetTag(int tag) { + VIXL_ASSERT(IsUint4(tag)); + tag_ = tag; + } + + static bool IsActive() { return is_active; } + static void SetActive(bool value) { is_active = value; } + + private: + static bool is_active; + int16_t tag_; + + friend class MetaDataDepot; + }; + + // Generate a key for metadata recording from a untagged address. + template + uint64_t GenerateMTEkey(T address) const { + // Cast the address using a C-style cast. A reinterpret_cast would be + // appropriate, but it can't cast one integral type to another. + return (uint64_t)(AddressUntag(address)) >> kMTETagGranuleInBytesLog2; + } + + template + R GetAttribute(T map, uint64_t key) { + auto pair = map->find(key); + R value = (pair == map->end()) ? nullptr : &pair->second; + return value; + } + + template + int GetMTETag(T address, Instruction const* pc = nullptr) { + uint64_t key = GenerateMTEkey(address); + MetaDataMTE* m = GetAttribute(&metadata_mte_, key); + + if (!m) { + std::stringstream sstream; + sstream << std::hex << "MTE ERROR : instruction at 0x" + << reinterpret_cast(pc) + << " touched a unallocated memory location 0x" + << (uint64_t)(address) << ".\n"; + VIXL_ABORT_WITH_MSG(sstream.str().c_str()); + } + + return m->GetTag(); + } + + template + void SetMTETag(T address, int tag, Instruction const* pc = nullptr) { + VIXL_ASSERT(IsAligned((uintptr_t)address, kMTETagGranuleInBytes)); + uint64_t key = GenerateMTEkey(address); + MetaDataMTE* m = GetAttribute(&metadata_mte_, key); + + if (!m) { + metadata_mte_.insert({key, MetaDataMTE(tag)}); + } else { + // Overwrite + if (m->GetTag() == tag) { + std::stringstream sstream; + sstream << std::hex << "MTE WARNING : instruction at 0x" + << reinterpret_cast(pc) + << ", the same tag is assigned to the address 0x" + << (uint64_t)(address) << ".\n"; + VIXL_WARNING(sstream.str().c_str()); + } + m->SetTag(tag); + } + } + + template + size_t CleanMTETag(T address) { + VIXL_ASSERT( + IsAligned(reinterpret_cast(address), kMTETagGranuleInBytes)); + uint64_t key = GenerateMTEkey(address); + return metadata_mte_.erase(key); + } + + size_t GetTotalCountMTE() { return metadata_mte_.size(); } + + // A pure virtual struct that allows the templated BranchInterception struct + // to be stored. For more information see BranchInterception. + struct BranchInterceptionAbstract { + virtual ~BranchInterceptionAbstract() {} + // Call the callback_ if one exists, otherwise do a RuntimeCall. + virtual void operator()(Simulator* simulator) const = 0; + }; + + // An entry denoting a function to intercept when branched to during + // simulator execution. When a function is intercepted the callback will be + // called if one exists otherwise the function will be passed to + // RuntimeCall. + template + struct BranchInterception : public BranchInterceptionAbstract { + BranchInterception(R (*function)(P...), + InterceptionCallback callback = nullptr) + : function_(function), callback_(callback) {} + + void operator()(Simulator* simulator) const VIXL_OVERRIDE; + + private: + // Pointer to the function that will be intercepted. + R (*function_)(P...); + + // Function to be called instead of function_ + InterceptionCallback callback_; + }; + + // Register a new BranchInterception object. If 'function' is branched to + // (e.g: "blr function") in the future; instead, if provided, 'callback' will + // be called otherwise a runtime call will be performed on 'function'. + // + // For example: this can be used to always perform runtime calls on + // non-AArch64 functions without using the macroassembler. + // + // Note: only unconditional branches to registers are currently supported to + // be intercepted, e.g: "br"/"blr". + // + // TODO: support intercepting other branch types. + template + void RegisterBranchInterception(R (*function)(P...), + InterceptionCallback callback = nullptr) { + uintptr_t addr = reinterpret_cast(function); + std::unique_ptr intercept = + std::make_unique>(function, callback); + branch_interceptions_.insert(std::make_pair(addr, std::move(intercept))); + } + + // Search for branch interceptions to the branch_target address; If one is + // found return it otherwise return nullptr. + BranchInterceptionAbstract* FindBranchInterception(uint64_t branch_target) { + // Check for interceptions to the target address, if one is found, call it. + auto search = branch_interceptions_.find(branch_target); + if (search != branch_interceptions_.end()) { + return search->second.get(); + } else { + return nullptr; + } + } + + void ResetState() { branch_interceptions_.clear(); } + + private: + // Tag recording of each allocated memory in the tag-granule. + std::unordered_map metadata_mte_; + + // Store a map of addresses to be intercepted and their corresponding branch + // interception object, see 'BranchInterception'. + std::unordered_map> + branch_interceptions_; +}; + + +// Representation of memory, with typed getters and setters for access. +class Memory { + public: + explicit Memory(SimStack::Allocated stack) : stack_(std::move(stack)) { + metadata_depot_ = nullptr; + } + + const SimStack::Allocated& GetStack() { return stack_; } + + template + bool IsMTETagsMatched(A address, Instruction const* pc = nullptr) const { + if (MetaDataDepot::MetaDataMTE::IsActive()) { + // Cast the address using a C-style cast. A reinterpret_cast would be + // appropriate, but it can't cast one integral type to another. + uint64_t addr = (uint64_t)address; + int pointer_tag = GetAllocationTagFromAddress(addr); + int memory_tag = metadata_depot_->GetMTETag(AddressUntag(addr), pc); + return pointer_tag == memory_tag; + } + return true; + } + + template + T Read(A address, Instruction const* pc = nullptr) const { + T value; + VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || + (sizeof(value) == 4) || (sizeof(value) == 8) || + (sizeof(value) == 16)); + auto base = reinterpret_cast(AddressUntag(address)); + if (stack_.IsAccessInGuardRegion(base, sizeof(value))) { + VIXL_ABORT_WITH_MSG("Attempt to read from stack guard region"); + } + if (!IsMTETagsMatched(address, pc)) { + VIXL_ABORT_WITH_MSG("Tag mismatch."); + } + memcpy(&value, base, sizeof(value)); + return value; + } + + template + void Write(A address, T value, Instruction const* pc = nullptr) const { + VIXL_STATIC_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || + (sizeof(value) == 4) || (sizeof(value) == 8) || + (sizeof(value) == 16)); + auto base = reinterpret_cast(AddressUntag(address)); + if (stack_.IsAccessInGuardRegion(base, sizeof(value))) { + VIXL_ABORT_WITH_MSG("Attempt to write to stack guard region"); + } + if (!IsMTETagsMatched(address, pc)) { + VIXL_ABORT_WITH_MSG("Tag mismatch."); + } + memcpy(base, &value, sizeof(value)); + } + + template + uint64_t ReadUint(int size_in_bytes, A address) const { + switch (size_in_bytes) { + case 1: + return Read(address); + case 2: + return Read(address); + case 4: + return Read(address); + case 8: + return Read(address); + } + VIXL_UNREACHABLE(); + return 0; + } + + template + int64_t ReadInt(int size_in_bytes, A address) const { + switch (size_in_bytes) { + case 1: + return Read(address); + case 2: + return Read(address); + case 4: + return Read(address); + case 8: + return Read(address); + } + VIXL_UNREACHABLE(); + return 0; + } + + template + void Write(int size_in_bytes, A address, uint64_t value) const { + switch (size_in_bytes) { + case 1: + return Write(address, static_cast(value)); + case 2: + return Write(address, static_cast(value)); + case 4: + return Write(address, static_cast(value)); + case 8: + return Write(address, value); + } + VIXL_UNREACHABLE(); + } + + void AppendMetaData(MetaDataDepot* metadata_depot) { + VIXL_ASSERT(metadata_depot != nullptr); + VIXL_ASSERT(metadata_depot_ == nullptr); + metadata_depot_ = metadata_depot; + } + + private: + SimStack::Allocated stack_; + MetaDataDepot* metadata_depot_; +}; + +// Represent a register (r0-r31, v0-v31, z0-z31, p0-p15). +template +class SimRegisterBase { + public: + static const unsigned kMaxSizeInBytes = kMaxSizeInBits / kBitsPerByte; + VIXL_STATIC_ASSERT((kMaxSizeInBytes * kBitsPerByte) == kMaxSizeInBits); + + SimRegisterBase() : size_in_bytes_(kMaxSizeInBytes) { Clear(); } + + unsigned GetSizeInBits() const { return size_in_bytes_ * kBitsPerByte; } + unsigned GetSizeInBytes() const { return size_in_bytes_; } + + void SetSizeInBytes(unsigned size_in_bytes) { + VIXL_ASSERT(size_in_bytes <= kMaxSizeInBytes); + size_in_bytes_ = size_in_bytes; + } + void SetSizeInBits(unsigned size_in_bits) { + VIXL_ASSERT(size_in_bits <= kMaxSizeInBits); + VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0); + SetSizeInBytes(size_in_bits / kBitsPerByte); + } + + // Write the specified value. The value is zero-extended if necessary. + template + void Write(T new_value) { + // All AArch64 registers are zero-extending. + if (sizeof(new_value) < GetSizeInBytes()) Clear(); + WriteLane(new_value, 0); + NotifyRegisterWrite(); + } + template + VIXL_DEPRECATED("Write", void Set(T new_value)) { + Write(new_value); + } + + void Clear() { + memset(value_, 0, kMaxSizeInBytes); + NotifyRegisterWrite(); + } + + // Insert a typed value into a register, leaving the rest of the register + // unchanged. The lane parameter indicates where in the register the value + // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where + // 0 represents the least significant bits. + template + void Insert(int lane, T new_value) { + WriteLane(new_value, lane); + NotifyRegisterWrite(); + } + + // Get the value as the specified type. The value is truncated if necessary. + template + T Get() const { + return GetLane(0); + } + + // Get the lane value as the specified type. The value is truncated if + // necessary. + template + T GetLane(int lane) const { + T result; + ReadLane(&result, lane); + return result; + } + template + VIXL_DEPRECATED("GetLane", T Get(int lane) const) { + return GetLane(lane); + } + + // Get the value of a specific bit, indexed from the least-significant bit of + // lane 0. + bool GetBit(int bit) const { + int bit_in_byte = bit % (sizeof(value_[0]) * kBitsPerByte); + int byte = bit / (sizeof(value_[0]) * kBitsPerByte); + return ((value_[byte] >> bit_in_byte) & 1) != 0; + } + + // Return a pointer to the raw, underlying byte array. + const uint8_t* GetBytes() const { return value_; } + + // TODO: Make this return a map of updated bytes, so that we can highlight + // updated lanes for load-and-insert. (That never happens for scalar code, but + // NEON has some instructions that can update individual lanes.) + bool WrittenSinceLastLog() const { return written_since_last_log_; } + + void NotifyRegisterLogged() { written_since_last_log_ = false; } + + protected: + uint8_t value_[kMaxSizeInBytes]; + + unsigned size_in_bytes_; + + // Helpers to aid with register tracing. + bool written_since_last_log_; + + void NotifyRegisterWrite() { written_since_last_log_ = true; } + + private: + template + void ReadLane(T* dst, int lane) const { + VIXL_ASSERT(lane >= 0); + VIXL_ASSERT((sizeof(*dst) + (lane * sizeof(*dst))) <= GetSizeInBytes()); + memcpy(dst, &value_[lane * sizeof(*dst)], sizeof(*dst)); + } + + template + void WriteLane(T src, int lane) { + VIXL_ASSERT(lane >= 0); + VIXL_ASSERT((sizeof(src) + (lane * sizeof(src))) <= GetSizeInBytes()); + memcpy(&value_[lane * sizeof(src)], &src, sizeof(src)); + } + + // The default ReadLane and WriteLane methods assume what we are copying is + // "trivially copyable" by using memcpy. We have to provide alternative + // implementations for SimFloat16 which cannot be copied this way. + + void ReadLane(vixl::internal::SimFloat16* dst, int lane) const { + uint16_t rawbits; + ReadLane(&rawbits, lane); + *dst = RawbitsToFloat16(rawbits); + } + + void WriteLane(vixl::internal::SimFloat16 src, int lane) { + WriteLane(Float16ToRawbits(src), lane); + } +}; + +typedef SimRegisterBase SimRegister; // r0-r31 +typedef SimRegisterBase SimPRegister; // p0-p15 +// FFR has the same format as a predicate register. +typedef SimPRegister SimFFRRegister; + +// v0-v31 and z0-z31 +class SimVRegister : public SimRegisterBase { + public: + SimVRegister() : SimRegisterBase(), accessed_as_z_(false) {} + + void NotifyAccessAsZ() { accessed_as_z_ = true; } + + void NotifyRegisterLogged() { + SimRegisterBase::NotifyRegisterLogged(); + accessed_as_z_ = false; + } + + bool AccessedAsZSinceLastLog() const { return accessed_as_z_; } + + private: + bool accessed_as_z_; +}; + +// Representation of a SVE predicate register. +class LogicPRegister { + public: + inline LogicPRegister( + SimPRegister& other) // NOLINT(runtime/references)(runtime/explicit) + : register_(other) {} + + // Set a conveniently-sized block to 16 bits as the minimum predicate length + // is 16 bits and allow to be increased to multiples of 16 bits. + typedef uint16_t ChunkType; + + // Assign a bit into the end positon of the specified lane. + // The bit is zero-extended if necessary. + void SetActive(VectorFormat vform, int lane_index, bool value) { + int psize = LaneSizeInBytesFromFormat(vform); + int bit_index = lane_index * psize; + int byte_index = bit_index / kBitsPerByte; + int bit_offset = bit_index % kBitsPerByte; + uint8_t byte = register_.GetLane(byte_index); + register_.Insert(byte_index, ZeroExtend(byte, bit_offset, psize, value)); + } + + bool IsActive(VectorFormat vform, int lane_index) const { + int psize = LaneSizeInBytesFromFormat(vform); + int bit_index = lane_index * psize; + int byte_index = bit_index / kBitsPerByte; + int bit_offset = bit_index % kBitsPerByte; + uint8_t byte = register_.GetLane(byte_index); + return ExtractBit(byte, bit_offset); + } + + // The accessors for bulk processing. + int GetChunkCount() const { + VIXL_ASSERT((register_.GetSizeInBytes() % sizeof(ChunkType)) == 0); + return register_.GetSizeInBytes() / sizeof(ChunkType); + } + + ChunkType GetChunk(int lane) const { return GetActiveMask(lane); } + + void SetChunk(int lane, ChunkType new_value) { + SetActiveMask(lane, new_value); + } + + void SetAllBits() { + int chunk_size = sizeof(ChunkType) * kBitsPerByte; + ChunkType bits = GetUintMask(chunk_size); + for (int lane = 0; + lane < (static_cast(register_.GetSizeInBits() / chunk_size)); + lane++) { + SetChunk(lane, bits); + } + } + + template + T GetActiveMask(int lane) const { + return register_.GetLane(lane); + } + + template + void SetActiveMask(int lane, T new_value) { + register_.Insert(lane, new_value); + } + + void Clear() { register_.Clear(); } + + bool Aliases(const LogicPRegister& other) const { + return ®ister_ == &other.register_; + } + + private: + // The bit assignment is zero-extended to fill the size of predicate element. + uint8_t ZeroExtend(uint8_t byte, int index, int psize, bool value) { + VIXL_ASSERT(index >= 0); + VIXL_ASSERT(index + psize <= kBitsPerByte); + int bits = value ? 1 : 0; + switch (psize) { + case 1: + AssignBit(byte, index, bits); + break; + case 2: + AssignBits(byte, index, 0x03, bits); + break; + case 4: + AssignBits(byte, index, 0x0f, bits); + break; + case 8: + AssignBits(byte, index, 0xff, bits); + break; + default: + VIXL_UNREACHABLE(); + return 0; + } + return byte; + } + + SimPRegister& register_; +}; + +// Representation of a vector register, with typed getters and setters for lanes +// and additional information to represent lane state. +class LogicVRegister { + public: + inline LogicVRegister( + SimVRegister& other) // NOLINT(runtime/references)(runtime/explicit) + : register_(other) { + for (size_t i = 0; i < ArrayLength(saturated_); i++) { + saturated_[i] = kNotSaturated; + } + for (size_t i = 0; i < ArrayLength(round_); i++) { + round_[i] = 0; + } + } + + int64_t Int(VectorFormat vform, int index) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + int64_t element; + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + element = register_.GetLane(index); + break; + case 16: + element = register_.GetLane(index); + break; + case 32: + element = register_.GetLane(index); + break; + case 64: + element = register_.GetLane(index); + break; + default: + VIXL_UNREACHABLE(); + return 0; + } + return element; + } + + uint64_t Uint(VectorFormat vform, int index) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + uint64_t element; + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + element = register_.GetLane(index); + break; + case 16: + element = register_.GetLane(index); + break; + case 32: + element = register_.GetLane(index); + break; + case 64: + element = register_.GetLane(index); + break; + default: + VIXL_UNREACHABLE(); + return 0; + } + return element; + } + + int UintArray(VectorFormat vform, uint64_t* dst) const { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst[i] = Uint(vform, i); + } + return LaneCountFromFormat(vform); + } + + uint64_t UintLeftJustified(VectorFormat vform, int index) const { + return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform)); + } + + int64_t IntLeftJustified(VectorFormat vform, int index) const { + uint64_t value = UintLeftJustified(vform, index); + int64_t result; + memcpy(&result, &value, sizeof(result)); + return result; + } + + void SetInt(VectorFormat vform, int index, int64_t value) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + register_.Insert(index, static_cast(value)); + break; + case 16: + register_.Insert(index, static_cast(value)); + break; + case 32: + register_.Insert(index, static_cast(value)); + break; + case 64: + register_.Insert(index, static_cast(value)); + break; + default: + VIXL_UNREACHABLE(); + return; + } + } + + void SetIntArray(VectorFormat vform, const int64_t* src) const { + ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SetInt(vform, i, src[i]); + } + } + + void SetUint(VectorFormat vform, int index, uint64_t value) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + register_.Insert(index, static_cast(value)); + break; + case 16: + register_.Insert(index, static_cast(value)); + break; + case 32: + register_.Insert(index, static_cast(value)); + break; + case 64: + register_.Insert(index, static_cast(value)); + break; + default: + VIXL_UNREACHABLE(); + return; + } + } + + void SetUintArray(VectorFormat vform, const uint64_t* src) const { + ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SetUint(vform, i, src[i]); + } + } + + template + T Float(int index) const { + return register_.GetLane(index); + } + + template + void SetFloat(int index, T value) const { + register_.Insert(index, value); + } + + template + void SetFloat(VectorFormat vform, int index, T value) const { + if (IsSVEFormat(vform)) register_.NotifyAccessAsZ(); + register_.Insert(index, value); + } + + void Clear() { register_.Clear(); } + + // When setting a result in a register larger than the result itself, the top + // bits of the register must be cleared. + void ClearForWrite(VectorFormat vform) const { + // SVE destinations write whole registers, so we have nothing to clear. + if (IsSVEFormat(vform)) return; + + unsigned size = RegisterSizeInBytesFromFormat(vform); + for (unsigned i = size; i < register_.GetSizeInBytes(); i++) { + SetUint(kFormat16B, i, 0); + } + } + + // Saturation state for each lane of a vector. + enum Saturation { + kNotSaturated = 0, + kSignedSatPositive = 1 << 0, + kSignedSatNegative = 1 << 1, + kSignedSatMask = kSignedSatPositive | kSignedSatNegative, + kSignedSatUndefined = kSignedSatMask, + kUnsignedSatPositive = 1 << 2, + kUnsignedSatNegative = 1 << 3, + kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative, + kUnsignedSatUndefined = kUnsignedSatMask + }; + + // Getters for saturation state. + Saturation GetSignedSaturation(int index) { + return static_cast(saturated_[index] & kSignedSatMask); + } + + Saturation GetUnsignedSaturation(int index) { + return static_cast(saturated_[index] & kUnsignedSatMask); + } + + // Setters for saturation state. + void ClearSat(int index) { saturated_[index] = kNotSaturated; } + + void SetSignedSat(int index, bool positive) { + SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative); + } + + void SetUnsignedSat(int index, bool positive) { + SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative); + } + + void SetSatFlag(int index, Saturation sat) { + saturated_[index] = static_cast(saturated_[index] | sat); + VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined); + VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined); + } + + // Saturate lanes of a vector based on saturation state. + LogicVRegister& SignedSaturate(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + Saturation sat = GetSignedSaturation(i); + if (sat == kSignedSatPositive) { + SetInt(vform, i, MaxIntFromFormat(vform)); + } else if (sat == kSignedSatNegative) { + SetInt(vform, i, MinIntFromFormat(vform)); + } + } + return *this; + } + + LogicVRegister& UnsignedSaturate(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + Saturation sat = GetUnsignedSaturation(i); + if (sat == kUnsignedSatPositive) { + SetUint(vform, i, MaxUintFromFormat(vform)); + } else if (sat == kUnsignedSatNegative) { + SetUint(vform, i, 0); + } + } + return *this; + } + + // Getter for rounding state. + bool GetRounding(int index) { return round_[index]; } + + // Setter for rounding state. + void SetRounding(int index, bool round) { round_[index] = round; } + + // Round lanes of a vector based on rounding state. + LogicVRegister& Round(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SetUint(vform, i, Uint(vform, i) + (GetRounding(i) ? 1 : 0)); + } + return *this; + } + + // Unsigned halve lanes of a vector, and use the saturation state to set the + // top bit. + LogicVRegister& Uhalve(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t val = Uint(vform, i); + SetRounding(i, (val & 1) == 1); + val >>= 1; + if (GetUnsignedSaturation(i) != kNotSaturated) { + // If the operation causes unsigned saturation, the bit shifted into the + // most significant bit must be set. + val |= (MaxUintFromFormat(vform) >> 1) + 1; + } + SetInt(vform, i, val); + } + return *this; + } + + // Signed halve lanes of a vector, and use the carry state to set the top bit. + LogicVRegister& Halve(VectorFormat vform) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t val = Int(vform, i); + SetRounding(i, (val & 1) == 1); + val = ExtractSignedBitfield64(63, 1, val); // >>= 1 + if (GetSignedSaturation(i) == kNotSaturated) { + SetInt(vform, i, val); + } else { + // If the operation causes signed saturation, the sign bit must be + // inverted. + uint64_t uval = static_cast(val); + SetUint(vform, i, uval ^ ((MaxUintFromFormat(vform) >> 1) + 1)); + } + } + return *this; + } + + int LaneCountFromFormat(VectorFormat vform) const { + if (IsSVEFormat(vform)) { + return register_.GetSizeInBits() / LaneSizeInBitsFromFormat(vform); + } else { + return vixl::aarch64::LaneCountFromFormat(vform); + } + } + + private: + SimVRegister& register_; + + // Allocate one saturation state entry per lane; largest register is type Q, + // and lanes can be a minimum of one byte wide. + Saturation saturated_[kZRegMaxSizeInBytes]; + + // Allocate one rounding state entry per lane. + bool round_[kZRegMaxSizeInBytes]; +}; + +// Represent an SVE addressing mode and abstract per-lane address generation to +// make iteration easy. +// +// Contiguous accesses are described with a simple base address, the memory +// occupied by each lane (`SetMsizeInBytesLog2()`) and the number of elements in +// each struct (`SetRegCount()`). +// +// Scatter-gather accesses also require a SimVRegister and information about how +// to extract lanes from it. +class LogicSVEAddressVector { + public: + // scalar-plus-scalar + // scalar-plus-immediate + explicit LogicSVEAddressVector(uint64_t base) + : base_(base), + msize_in_bytes_log2_(kUnknownMsizeInBytesLog2), + reg_count_(1), + vector_(NULL), + vector_form_(kFormatUndefined), + vector_mod_(NO_SVE_OFFSET_MODIFIER), + vector_shift_(0) {} + + // scalar-plus-vector + // vector-plus-immediate + // `base` should be the constant used for each element. That is, the value + // of `xn`, or `#`. + // `vector` should be the SimVRegister with offsets for each element. The + // vector format must be specified; SVE scatter/gather accesses typically + // support both 32-bit and 64-bit addressing. + // + // `mod` and `shift` correspond to the modifiers applied to each element in + // scalar-plus-vector forms, such as those used for unpacking and + // sign-extension. They are not used for vector-plus-immediate. + LogicSVEAddressVector(uint64_t base, + const SimVRegister* vector, + VectorFormat vform, + SVEOffsetModifier mod = NO_SVE_OFFSET_MODIFIER, + int shift = 0) + : base_(base), + msize_in_bytes_log2_(kUnknownMsizeInBytesLog2), + reg_count_(1), + vector_(vector), + vector_form_(vform), + vector_mod_(mod), + vector_shift_(shift) {} + + // Set `msize` -- the memory occupied by each lane -- for address + // calculations. + void SetMsizeInBytesLog2(int msize_in_bytes_log2) { + VIXL_ASSERT(msize_in_bytes_log2 >= static_cast(kBRegSizeInBytesLog2)); + VIXL_ASSERT(msize_in_bytes_log2 <= static_cast(kDRegSizeInBytesLog2)); + msize_in_bytes_log2_ = msize_in_bytes_log2; + } + + bool HasMsize() const { + return msize_in_bytes_log2_ != kUnknownMsizeInBytesLog2; + } + + int GetMsizeInBytesLog2() const { + VIXL_ASSERT(HasMsize()); + return msize_in_bytes_log2_; + } + int GetMsizeInBitsLog2() const { + return GetMsizeInBytesLog2() + kBitsPerByteLog2; + } + + int GetMsizeInBytes() const { return 1 << GetMsizeInBytesLog2(); } + int GetMsizeInBits() const { return 1 << GetMsizeInBitsLog2(); } + + void SetRegCount(int reg_count) { + VIXL_ASSERT(reg_count >= 1); // E.g. ld1/st1 + VIXL_ASSERT(reg_count <= 4); // E.g. ld4/st4 + reg_count_ = reg_count; + } + + int GetRegCount() const { return reg_count_; } + + // Full per-element address calculation for structured accesses. + // + // Note that the register number argument (`reg`) is zero-based. + uint64_t GetElementAddress(int lane, int reg) const { + VIXL_ASSERT(reg < GetRegCount()); + // Individual structures are always contiguous in memory, so this + // implementation works for both contiguous and scatter-gather addressing. + return GetStructAddress(lane) + (reg * GetMsizeInBytes()); + } + + // Full per-struct address calculation for structured accesses. + uint64_t GetStructAddress(int lane) const; + + bool IsContiguous() const { return vector_ == NULL; } + bool IsScatterGather() const { return !IsContiguous(); } + + private: + uint64_t base_; + int msize_in_bytes_log2_; + int reg_count_; + + const SimVRegister* vector_; + VectorFormat vector_form_; + SVEOffsetModifier vector_mod_; + int vector_shift_; + + static const int kUnknownMsizeInBytesLog2 = -1; +}; + +// The proper way to initialize a simulated system register (such as NZCV) is as +// follows: +// SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV); +class SimSystemRegister { + public: + // The default constructor represents a register which has no writable bits. + // It is not possible to set its value to anything other than 0. + SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) {} + + uint32_t GetRawValue() const { return value_; } + VIXL_DEPRECATED("GetRawValue", uint32_t RawValue() const) { + return GetRawValue(); + } + + void SetRawValue(uint32_t new_value) { + value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_); + } + + uint32_t ExtractBits(int msb, int lsb) const { + return ExtractUnsignedBitfield32(msb, lsb, value_); + } + VIXL_DEPRECATED("ExtractBits", uint32_t Bits(int msb, int lsb) const) { + return ExtractBits(msb, lsb); + } + + int32_t ExtractSignedBits(int msb, int lsb) const { + return ExtractSignedBitfield32(msb, lsb, value_); + } + VIXL_DEPRECATED("ExtractSignedBits", + int32_t SignedBits(int msb, int lsb) const) { + return ExtractSignedBits(msb, lsb); + } + + void SetBits(int msb, int lsb, uint32_t bits); + + // Default system register values. + static SimSystemRegister DefaultValueFor(SystemRegister id); + +#define DEFINE_GETTER(Name, HighBit, LowBit, Func) \ + uint32_t Get##Name() const { return this->Func(HighBit, LowBit); } \ + VIXL_DEPRECATED("Get" #Name, uint32_t Name() const) { return Get##Name(); } \ + void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); } +#define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \ + static const uint32_t Name##WriteIgnoreMask = ~static_cast(Mask); + + SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK) + +#undef DEFINE_ZERO_BITS +#undef DEFINE_GETTER + + protected: + // Most system registers only implement a few of the bits in the word. Other + // bits are "read-as-zero, write-ignored". The write_ignore_mask argument + // describes the bits which are not modifiable. + SimSystemRegister(uint32_t value, uint32_t write_ignore_mask) + : value_(value), write_ignore_mask_(write_ignore_mask) {} + + uint32_t value_; + uint32_t write_ignore_mask_; +}; + + +class SimExclusiveLocalMonitor { + public: + SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) { + Clear(); + } + + // Clear the exclusive monitor (like clrex). + void Clear() { + address_ = 0; + size_ = 0; + } + + // Clear the exclusive monitor most of the time. + void MaybeClear() { + if ((seed_ % kSkipClearProbability) != 0) { + Clear(); + } + + // Advance seed_ using a simple linear congruential generator. + seed_ = (seed_ * 48271) % 2147483647; + } + + // Mark the address range for exclusive access (like load-exclusive). + void MarkExclusive(uint64_t address, size_t size) { + address_ = address; + size_ = size; + } + + // Return true if the address range is marked (like store-exclusive). + // This helper doesn't implicitly clear the monitor. + bool IsExclusive(uint64_t address, size_t size) { + VIXL_ASSERT(size > 0); + // Be pedantic: Require both the address and the size to match. + return (size == size_) && (address == address_); + } + + private: + uint64_t address_; + size_t size_; + + const int kSkipClearProbability; + uint32_t seed_; +}; + + +// We can't accurate simulate the global monitor since it depends on external +// influences. Instead, this implementation occasionally causes accesses to +// fail, according to kPassProbability. +class SimExclusiveGlobalMonitor { + public: + SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {} + + bool IsExclusive(uint64_t address, size_t size) { + USE(address, size); + + bool pass = (seed_ % kPassProbability) != 0; + // Advance seed_ using a simple linear congruential generator. + seed_ = (seed_ * 48271) % 2147483647; + return pass; + } + + private: + const int kPassProbability; + uint32_t seed_; +}; + +class Simulator : public DecoderVisitor { + public: + explicit Simulator(Decoder* decoder, + FILE* stream = stdout, + SimStack::Allocated stack = SimStack().Allocate()); + ~Simulator(); + + void ResetState(); + + // Run the simulator. + virtual void Run(); + void RunFrom(const Instruction* first); + + +#if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \ + (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) + // Templated `RunFrom` version taking care of passing arguments and returning + // the result value. + // This allows code like: + // int32_t res = simulator.RunFrom(GenerateCode(), + // 0x123); + // It requires VIXL's ABI features, and C++11 or greater. + // Also, the initialisation of tuples is incorrect in GCC before 4.9.1: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253 + template + R RunFrom(const Instruction* code, P... arguments) { + return RunFromStructHelper::Wrapper(this, code, arguments...); + } + + template + struct RunFromStructHelper { + static R Wrapper(Simulator* simulator, + const Instruction* code, + P... arguments) { + ABI abi; + std::tuple unused_tuple{ + // TODO: We currently do not support arguments passed on the stack. We + // could do so by using `WriteGenericOperand()` here, but may need to + // add features to handle situations where the stack is or is not set + // up. + (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand

() + .GetCPURegister(), + arguments), + arguments)...}; + simulator->RunFrom(code); + return simulator->ReadGenericOperand(abi.GetReturnGenericOperand()); + } + }; + + // Partial specialization when the return type is `void`. + template + struct RunFromStructHelper { + static void Wrapper(Simulator* simulator, + const Instruction* code, + P... arguments) { + ABI abi; + std::tuple unused_tuple{ + // TODO: We currently do not support arguments passed on the stack. We + // could do so by using `WriteGenericOperand()` here, but may need to + // add features to handle situations where the stack is or is not set + // up. + (simulator->WriteCPURegister(abi.GetNextParameterGenericOperand

() + .GetCPURegister(), + arguments), + arguments)...}; + simulator->RunFrom(code); + } + }; +#endif + + // Execution ends when the PC hits this address. + static const Instruction* kEndOfSimAddress; + + // Simulation helpers. + const Instruction* ReadPc() const { return pc_; } + VIXL_DEPRECATED("ReadPc", const Instruction* pc() const) { return ReadPc(); } + + enum BranchLogMode { LogBranches, NoBranchLog }; + + void WritePc(const Instruction* new_pc, + BranchLogMode log_mode = LogBranches) { + if (log_mode == LogBranches) LogTakenBranch(new_pc); + pc_ = AddressUntag(new_pc); + pc_modified_ = true; + } + VIXL_DEPRECATED("WritePc", void set_pc(const Instruction* new_pc)) { + return WritePc(new_pc); + } + + void IncrementPc() { + if (!pc_modified_) { + pc_ = pc_->GetNextInstruction(); + } + } + VIXL_DEPRECATED("IncrementPc", void increment_pc()) { IncrementPc(); } + + BType ReadBType() const { return btype_; } + void WriteNextBType(BType btype) { next_btype_ = btype; } + void UpdateBType() { + btype_ = next_btype_; + next_btype_ = DefaultBType; + } + + // Helper function to determine BType for branches. + BType GetBTypeFromInstruction(const Instruction* instr) const; + + bool PcIsInGuardedPage() const { return guard_pages_; } + void SetGuardedPages(bool guard_pages) { guard_pages_ = guard_pages; } + + const Instruction* GetLastExecutedInstruction() const { return last_instr_; } + + void ExecuteInstruction() { + // The program counter should always be aligned. + VIXL_ASSERT(IsWordAligned(pc_)); + pc_modified_ = false; + + // On guarded pages, if BType is not zero, take an exception on any + // instruction other than BTI, PACI[AB]SP, HLT or BRK. + if (PcIsInGuardedPage() && (ReadBType() != DefaultBType)) { + if (pc_->IsPAuth()) { + Instr i = pc_->Mask(SystemPAuthMask); + if ((i != PACIASP) && (i != PACIBSP)) { + VIXL_ABORT_WITH_MSG( + "Executing non-BTI instruction with wrong BType."); + } + } else if (!pc_->IsBti() && !pc_->IsException()) { + VIXL_ABORT_WITH_MSG("Executing non-BTI instruction with wrong BType."); + } + } + + bool last_instr_was_movprfx = + (form_hash_ == "movprfx_z_z"_h) || (form_hash_ == "movprfx_z_p_z"_h); + + // decoder_->Decode(...) triggers at least the following visitors: + // 1. The CPUFeaturesAuditor (`cpu_features_auditor_`). + // 2. The PrintDisassembler (`print_disasm_`), if enabled. + // 3. The Simulator (`this`). + // User can add additional visitors at any point, but the Simulator requires + // that the ordering above is preserved. + decoder_->Decode(pc_); + + if (last_instr_was_movprfx) { + VIXL_ASSERT(last_instr_ != NULL); + VIXL_CHECK(pc_->CanTakeSVEMovprfx(form_hash_, last_instr_)); + } + + last_instr_ = ReadPc(); + IncrementPc(); + LogAllWrittenRegisters(); + UpdateBType(); + + VIXL_CHECK(cpu_features_auditor_.InstructionIsAvailable()); + } + + virtual void Visit(Metadata* metadata, + const Instruction* instr) VIXL_OVERRIDE; + +#define DECLARE(A) virtual void Visit##A(const Instruction* instr); + VISITOR_LIST_THAT_RETURN(DECLARE) +#undef DECLARE +#define DECLARE(A) \ + VIXL_NO_RETURN virtual void Visit##A(const Instruction* instr); + VISITOR_LIST_THAT_DONT_RETURN(DECLARE) +#undef DECLARE + + void Simulate_PdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Simulate_PdT_Xn_Xm(const Instruction* instr); + void Simulate_ZdB_Zn1B_Zn2B_imm(const Instruction* instr); + void Simulate_ZdB_ZnB_ZmB(const Instruction* instr); + void Simulate_ZdD_ZnD_ZmD_imm(const Instruction* instr); + void Simulate_ZdH_PgM_ZnS(const Instruction* instr); + void Simulate_ZdH_ZnH_ZmH_imm(const Instruction* instr); + void Simulate_ZdS_PgM_ZnD(const Instruction* instr); + void Simulate_ZdS_PgM_ZnS(const Instruction* instr); + void Simulate_ZdS_ZnS_ZmS_imm(const Instruction* instr); + void Simulate_ZdT_PgM_ZnT(const Instruction* instr); + void Simulate_ZdT_PgZ_ZnT_ZmT(const Instruction* instr); + void Simulate_ZdT_ZnT_ZmT(const Instruction* instr); + void Simulate_ZdT_ZnT_ZmTb(const Instruction* instr); + void Simulate_ZdT_ZnT_const(const Instruction* instr); + void Simulate_ZdaD_ZnS_ZmS_imm(const Instruction* instr); + void Simulate_ZdaH_ZnH_ZmH_imm_const(const Instruction* instr); + void Simulate_ZdaS_ZnH_ZmH(const Instruction* instr); + void Simulate_ZdaS_ZnH_ZmH_imm(const Instruction* instr); + void Simulate_ZdaS_ZnS_ZmS_imm_const(const Instruction* instr); + void Simulate_ZdaT_PgM_ZnTb(const Instruction* instr); + void Simulate_ZdaT_ZnT_ZmT(const Instruction* instr); + void Simulate_ZdaT_ZnT_const(const Instruction* instr); + void Simulate_ZdaT_ZnTb_ZmTb(const Instruction* instr); + void Simulate_ZdnT_PgM_ZdnT_ZmT(const Instruction* instr); + void Simulate_ZdnT_PgM_ZdnT_const(const Instruction* instr); + void Simulate_ZdnT_ZdnT_ZmT_const(const Instruction* instr); + void Simulate_ZtD_PgZ_ZnD_Xm(const Instruction* instr); + void Simulate_ZtD_Pg_ZnD_Xm(const Instruction* instr); + void Simulate_ZtS_PgZ_ZnS_Xm(const Instruction* instr); + void Simulate_ZtS_Pg_ZnS_Xm(const Instruction* instr); + + void SimulateSVEHalvingAddSub(const Instruction* instr); + void SimulateSVESaturatingArithmetic(const Instruction* instr); + void SimulateSVEIntArithPair(const Instruction* instr); + void SimulateSVENarrow(const Instruction* instr); + void SimulateSVEInterleavedArithLong(const Instruction* instr); + void SimulateSVEShiftLeftImm(const Instruction* instr); + void SimulateSVEAddSubCarry(const Instruction* instr); + void SimulateSVEAddSubHigh(const Instruction* instr); + void SimulateSVEIntMulLongVec(const Instruction* instr); + void SimulateSVESaturatingIntMulLongIdx(const Instruction* instr); + void SimulateSVEExclusiveOrRotate(const Instruction* instr); + void SimulateSVEBitwiseTernary(const Instruction* instr); + void SimulateSVEComplexDotProduct(const Instruction* instr); + void SimulateSVEMulIndex(const Instruction* instr); + void SimulateSVEMlaMlsIndex(const Instruction* instr); + void SimulateSVEComplexIntMulAdd(const Instruction* instr); + void SimulateSVESaturatingMulAddHigh(const Instruction* instr); + void SimulateSVESaturatingMulHighIndex(const Instruction* instr); + void SimulateSVEFPConvertLong(const Instruction* instr); + void SimulateMatrixMul(const Instruction* instr); + void SimulateSVEFPMatrixMul(const Instruction* instr); + void SimulateNEONMulByElementLong(const Instruction* instr); + void SimulateNEONFPMulByElement(const Instruction* instr); + void SimulateNEONFPMulByElementLong(const Instruction* instr); + void SimulateNEONComplexMulByElement(const Instruction* instr); + void SimulateNEONDotProdByElement(const Instruction* instr); + void SimulateMTEAddSubTag(const Instruction* instr); + void SimulateMTETagMaskInsert(const Instruction* instr); + void SimulateMTESubPointer(const Instruction* instr); + void SimulateMTELoadTag(const Instruction* instr); + void SimulateMTEStoreTag(const Instruction* instr); + void SimulateMTEStoreTagPair(const Instruction* instr); + void Simulate_XdSP_XnSP_Xm(const Instruction* instr); + void SimulateCpy(const Instruction* instr); + void SimulateCpyFP(const Instruction* instr); + void SimulateCpyP(const Instruction* instr); + void SimulateCpyM(const Instruction* instr); + void SimulateCpyE(const Instruction* instr); + void SimulateSetP(const Instruction* instr); + void SimulateSetM(const Instruction* instr); + void SimulateSetE(const Instruction* instr); + void SimulateSetGP(const Instruction* instr); + void SimulateSetGM(const Instruction* instr); + void SimulateSignedMinMax(const Instruction* instr); + void SimulateUnsignedMinMax(const Instruction* instr); + + + // Integer register accessors. + + // Basic accessor: Read the register as the specified type. + template + T ReadRegister(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const { + VIXL_ASSERT( + code < kNumberOfRegisters || + ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode))); + if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { + T result; + memset(&result, 0, sizeof(result)); + return result; + } + if ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode)) { + code = 31; + } + return registers_[code].Get(); + } + template + VIXL_DEPRECATED("ReadRegister", + T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) + const) { + return ReadRegister(code, r31mode); + } + + // Common specialized accessors for the ReadRegister() template. + int32_t ReadWRegister(unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return ReadRegister(code, r31mode); + } + VIXL_DEPRECATED("ReadWRegister", + int32_t wreg(unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const) { + return ReadWRegister(code, r31mode); + } + + int64_t ReadXRegister(unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return ReadRegister(code, r31mode); + } + VIXL_DEPRECATED("ReadXRegister", + int64_t xreg(unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const) { + return ReadXRegister(code, r31mode); + } + + SimPRegister& ReadPRegister(unsigned code) { + VIXL_ASSERT(code < kNumberOfPRegisters); + return pregisters_[code]; + } + + SimFFRRegister& ReadFFR() { return ffr_register_; } + + // As above, with parameterized size and return type. The value is + // either zero-extended or truncated to fit, as required. + template + T ReadRegister(unsigned size, + unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + uint64_t raw; + switch (size) { + case kWRegSize: + raw = ReadRegister(code, r31mode); + break; + case kXRegSize: + raw = ReadRegister(code, r31mode); + break; + default: + VIXL_UNREACHABLE(); + return 0; + } + + T result; + VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw)); + // Copy the result and truncate to fit. This assumes a little-endian host. + memcpy(&result, &raw, sizeof(result)); + return result; + } + template + VIXL_DEPRECATED("ReadRegister", + T reg(unsigned size, + unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const) { + return ReadRegister(size, code, r31mode); + } + + // Use int64_t by default if T is not specified. + int64_t ReadRegister(unsigned size, + unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const { + return ReadRegister(size, code, r31mode); + } + VIXL_DEPRECATED("ReadRegister", + int64_t reg(unsigned size, + unsigned code, + Reg31Mode r31mode = Reg31IsZeroRegister) const) { + return ReadRegister(size, code, r31mode); + } + + enum RegLogMode { LogRegWrites, NoRegLog }; + + // Write 'value' into an integer register. The value is zero-extended. This + // behaviour matches AArch64 register writes. + // + // SP may be specified in one of two ways: + // - (code == kSPRegInternalCode) && (r31mode == Reg31IsZeroRegister) + // - (code == 31) && (r31mode == Reg31IsStackPointer) + template + void WriteRegister(unsigned code, + T value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + if (sizeof(T) < kWRegSizeInBytes) { + // We use a C-style cast on purpose here. + // Since we do not have access to 'constepxr if', the casts in this `if` + // must be valid even if we know the code will never be executed, in + // particular when `T` is a pointer type. + int64_t tmp_64bit = (int64_t)value; + int32_t tmp_32bit = static_cast(tmp_64bit); + WriteRegister(code, tmp_32bit, log_mode, r31mode); + return; + } + + VIXL_ASSERT((sizeof(T) == kWRegSizeInBytes) || + (sizeof(T) == kXRegSizeInBytes)); + VIXL_ASSERT( + (code < kNumberOfRegisters) || + ((r31mode == Reg31IsZeroRegister) && (code == kSPRegInternalCode))); + + if (code == 31) { + if (r31mode == Reg31IsZeroRegister) { + // Discard writes to the zero register. + return; + } else { + code = kSPRegInternalCode; + } + } + + // registers_[31] is the stack pointer. + VIXL_STATIC_ASSERT((kSPRegInternalCode % kNumberOfRegisters) == 31); + registers_[code % kNumberOfRegisters].Write(value); + + if (log_mode == LogRegWrites) { + LogRegister(code, GetPrintRegisterFormatForSize(sizeof(T))); + } + } + template + VIXL_DEPRECATED("WriteRegister", + void set_reg(unsigned code, + T value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister)) { + WriteRegister(code, value, log_mode, r31mode); + } + + // Common specialized accessors for the set_reg() template. + void WriteWRegister(unsigned code, + int32_t value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + WriteRegister(code, value, log_mode, r31mode); + } + VIXL_DEPRECATED("WriteWRegister", + void set_wreg(unsigned code, + int32_t value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister)) { + WriteWRegister(code, value, log_mode, r31mode); + } + + void WriteXRegister(unsigned code, + int64_t value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + WriteRegister(code, value, log_mode, r31mode); + } + VIXL_DEPRECATED("WriteXRegister", + void set_xreg(unsigned code, + int64_t value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister)) { + WriteXRegister(code, value, log_mode, r31mode); + } + + // As above, with parameterized size and type. The value is either + // zero-extended or truncated to fit, as required. + template + void WriteRegister(unsigned size, + unsigned code, + T value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister) { + // Zero-extend the input. + uint64_t raw = 0; + VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw)); + memcpy(&raw, &value, sizeof(value)); + + // Write (and possibly truncate) the value. + switch (size) { + case kWRegSize: + WriteRegister(code, static_cast(raw), log_mode, r31mode); + break; + case kXRegSize: + WriteRegister(code, raw, log_mode, r31mode); + break; + default: + VIXL_UNREACHABLE(); + return; + } + } + template + VIXL_DEPRECATED("WriteRegister", + void set_reg(unsigned size, + unsigned code, + T value, + RegLogMode log_mode = LogRegWrites, + Reg31Mode r31mode = Reg31IsZeroRegister)) { + WriteRegister(size, code, value, log_mode, r31mode); + } + + // Common specialized accessors for the set_reg() template. + + // Commonly-used special cases. + template + void WriteLr(T value) { + WriteRegister(kLinkRegCode, value); + } + template + VIXL_DEPRECATED("WriteLr", void set_lr(T value)) { + WriteLr(value); + } + + template + void WriteSp(T value) { + WriteRegister(31, value, LogRegWrites, Reg31IsStackPointer); + } + template + VIXL_DEPRECATED("WriteSp", void set_sp(T value)) { + WriteSp(value); + } + + // Vector register accessors. + // These are equivalent to the integer register accessors, but for vector + // registers. + + // A structure for representing a 128-bit Q register. + struct qreg_t { + uint8_t val[kQRegSizeInBytes]; + }; + + // A structure for representing a SVE Z register. + struct zreg_t { + uint8_t val[kZRegMaxSizeInBytes]; + }; + + // Basic accessor: read the register as the specified type. + template + T ReadVRegister(unsigned code) const { + VIXL_STATIC_ASSERT( + (sizeof(T) == kBRegSizeInBytes) || (sizeof(T) == kHRegSizeInBytes) || + (sizeof(T) == kSRegSizeInBytes) || (sizeof(T) == kDRegSizeInBytes) || + (sizeof(T) == kQRegSizeInBytes)); + VIXL_ASSERT(code < kNumberOfVRegisters); + + return vregisters_[code].Get(); + } + template + VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned code) const) { + return ReadVRegister(code); + } + + // Common specialized accessors for the vreg() template. + int8_t ReadBRegister(unsigned code) const { + return ReadVRegister(code); + } + VIXL_DEPRECATED("ReadBRegister", int8_t breg(unsigned code) const) { + return ReadBRegister(code); + } + + vixl::internal::SimFloat16 ReadHRegister(unsigned code) const { + return RawbitsToFloat16(ReadHRegisterBits(code)); + } + VIXL_DEPRECATED("ReadHRegister", int16_t hreg(unsigned code) const) { + return Float16ToRawbits(ReadHRegister(code)); + } + + uint16_t ReadHRegisterBits(unsigned code) const { + return ReadVRegister(code); + } + + float ReadSRegister(unsigned code) const { + return ReadVRegister(code); + } + VIXL_DEPRECATED("ReadSRegister", float sreg(unsigned code) const) { + return ReadSRegister(code); + } + + uint32_t ReadSRegisterBits(unsigned code) const { + return ReadVRegister(code); + } + VIXL_DEPRECATED("ReadSRegisterBits", + uint32_t sreg_bits(unsigned code) const) { + return ReadSRegisterBits(code); + } + + double ReadDRegister(unsigned code) const { + return ReadVRegister(code); + } + VIXL_DEPRECATED("ReadDRegister", double dreg(unsigned code) const) { + return ReadDRegister(code); + } + + uint64_t ReadDRegisterBits(unsigned code) const { + return ReadVRegister(code); + } + VIXL_DEPRECATED("ReadDRegisterBits", + uint64_t dreg_bits(unsigned code) const) { + return ReadDRegisterBits(code); + } + + qreg_t ReadQRegister(unsigned code) const { + return ReadVRegister(code); + } + VIXL_DEPRECATED("ReadQRegister", qreg_t qreg(unsigned code) const) { + return ReadQRegister(code); + } + + // As above, with parameterized size and return type. The value is + // either zero-extended or truncated to fit, as required. + template + T ReadVRegister(unsigned size, unsigned code) const { + uint64_t raw = 0; + T result; + + switch (size) { + case kSRegSize: + raw = ReadVRegister(code); + break; + case kDRegSize: + raw = ReadVRegister(code); + break; + default: + VIXL_UNREACHABLE(); + break; + } + + VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw)); + // Copy the result and truncate to fit. This assumes a little-endian host. + memcpy(&result, &raw, sizeof(result)); + return result; + } + template + VIXL_DEPRECATED("ReadVRegister", T vreg(unsigned size, unsigned code) const) { + return ReadVRegister(size, code); + } + + SimVRegister& ReadVRegister(unsigned code) { return vregisters_[code]; } + VIXL_DEPRECATED("ReadVRegister", SimVRegister& vreg(unsigned code)) { + return ReadVRegister(code); + } + + // Basic accessor: Write the specified value. + template + void WriteVRegister(unsigned code, + T value, + RegLogMode log_mode = LogRegWrites) { + VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) || + (sizeof(value) == kHRegSizeInBytes) || + (sizeof(value) == kSRegSizeInBytes) || + (sizeof(value) == kDRegSizeInBytes) || + (sizeof(value) == kQRegSizeInBytes) || + (sizeof(value) == kZRegMaxSizeInBytes)); + VIXL_ASSERT(code < kNumberOfVRegisters); + vregisters_[code].Write(value); + + if (log_mode == LogRegWrites) { + LogVRegister(code, GetPrintRegisterFormat(value)); + } + } + template + VIXL_DEPRECATED("WriteVRegister", + void set_vreg(unsigned code, + T value, + RegLogMode log_mode = LogRegWrites)) { + WriteVRegister(code, value, log_mode); + } + + // Common specialized accessors for the WriteVRegister() template. + void WriteBRegister(unsigned code, + int8_t value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + VIXL_DEPRECATED("WriteBRegister", + void set_breg(unsigned code, + int8_t value, + RegLogMode log_mode = LogRegWrites)) { + return WriteBRegister(code, value, log_mode); + } + + void WriteHRegister(unsigned code, + vixl::internal::SimFloat16 value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, Float16ToRawbits(value), log_mode); + } + + void WriteHRegister(unsigned code, + int16_t value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + VIXL_DEPRECATED("WriteHRegister", + void set_hreg(unsigned code, + int16_t value, + RegLogMode log_mode = LogRegWrites)) { + return WriteHRegister(code, value, log_mode); + } + + void WriteSRegister(unsigned code, + float value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + VIXL_DEPRECATED("WriteSRegister", + void set_sreg(unsigned code, + float value, + RegLogMode log_mode = LogRegWrites)) { + WriteSRegister(code, value, log_mode); + } + + void WriteSRegisterBits(unsigned code, + uint32_t value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + VIXL_DEPRECATED("WriteSRegisterBits", + void set_sreg_bits(unsigned code, + uint32_t value, + RegLogMode log_mode = LogRegWrites)) { + WriteSRegisterBits(code, value, log_mode); + } + + void WriteDRegister(unsigned code, + double value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + VIXL_DEPRECATED("WriteDRegister", + void set_dreg(unsigned code, + double value, + RegLogMode log_mode = LogRegWrites)) { + WriteDRegister(code, value, log_mode); + } + + void WriteDRegisterBits(unsigned code, + uint64_t value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + VIXL_DEPRECATED("WriteDRegisterBits", + void set_dreg_bits(unsigned code, + uint64_t value, + RegLogMode log_mode = LogRegWrites)) { + WriteDRegisterBits(code, value, log_mode); + } + + void WriteQRegister(unsigned code, + qreg_t value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + VIXL_DEPRECATED("WriteQRegister", + void set_qreg(unsigned code, + qreg_t value, + RegLogMode log_mode = LogRegWrites)) { + WriteQRegister(code, value, log_mode); + } + + void WriteZRegister(unsigned code, + zreg_t value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(code, value, log_mode); + } + + template + T ReadRegister(Register reg) const { + return ReadRegister(reg.GetCode(), Reg31IsZeroRegister); + } + + template + void WriteRegister(Register reg, + T value, + RegLogMode log_mode = LogRegWrites) { + WriteRegister(reg.GetCode(), value, log_mode, Reg31IsZeroRegister); + } + + template + T ReadVRegister(VRegister vreg) const { + return ReadVRegister(vreg.GetCode()); + } + + template + void WriteVRegister(VRegister vreg, + T value, + RegLogMode log_mode = LogRegWrites) { + WriteVRegister(vreg.GetCode(), value, log_mode); + } + + template + T ReadCPURegister(CPURegister reg) const { + if (reg.IsVRegister()) { + return ReadVRegister(VRegister(reg)); + } else { + return ReadRegister(Register(reg)); + } + } + + template + void WriteCPURegister(CPURegister reg, + T value, + RegLogMode log_mode = LogRegWrites) { + if (reg.IsVRegister()) { + WriteVRegister(VRegister(reg), value, log_mode); + } else { + WriteRegister(Register(reg), value, log_mode); + } + } + + template + T MemRead(A address) const { + Instruction const* pc = ReadPc(); + return memory_.Read(address, pc); + } + + template + void MemWrite(A address, T value) const { + Instruction const* pc = ReadPc(); + return memory_.Write(address, value, pc); + } + + template + uint64_t MemReadUint(int size_in_bytes, A address) const { + return memory_.ReadUint(size_in_bytes, address); + } + + template + int64_t MemReadInt(int size_in_bytes, A address) const { + return memory_.ReadInt(size_in_bytes, address); + } + + template + void MemWrite(int size_in_bytes, A address, uint64_t value) const { + return memory_.Write(size_in_bytes, address, value); + } + + void LoadLane(LogicVRegister dst, + VectorFormat vform, + int index, + uint64_t addr) const { + unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform); + LoadUintToLane(dst, vform, msize_in_bytes, index, addr); + } + + void LoadUintToLane(LogicVRegister dst, + VectorFormat vform, + unsigned msize_in_bytes, + int index, + uint64_t addr) const { + dst.SetUint(vform, index, MemReadUint(msize_in_bytes, addr)); + } + + void LoadIntToLane(LogicVRegister dst, + VectorFormat vform, + unsigned msize_in_bytes, + int index, + uint64_t addr) const { + dst.SetInt(vform, index, MemReadInt(msize_in_bytes, addr)); + } + + void StoreLane(const LogicVRegister& src, + VectorFormat vform, + int index, + uint64_t addr) const { + unsigned msize_in_bytes = LaneSizeInBytesFromFormat(vform); + MemWrite(msize_in_bytes, addr, src.Uint(vform, index)); + } + + uint64_t ComputeMemOperandAddress(const MemOperand& mem_op) const; + + template + T ReadGenericOperand(GenericOperand operand) const { + if (operand.IsCPURegister()) { + return ReadCPURegister(operand.GetCPURegister()); + } else { + VIXL_ASSERT(operand.IsMemOperand()); + return MemRead(ComputeMemOperandAddress(operand.GetMemOperand())); + } + } + + template + void WriteGenericOperand(GenericOperand operand, + T value, + RegLogMode log_mode = LogRegWrites) { + if (operand.IsCPURegister()) { + // Outside SIMD, registers are 64-bit or a subset of a 64-bit register. If + // the width of the value to write is smaller than 64 bits, the unused + // bits may contain unrelated values that the code following this write + // needs to handle gracefully. + // Here we fill the unused bits with a predefined pattern to catch issues + // early. + VIXL_ASSERT(operand.GetCPURegister().GetSizeInBits() <= 64); + uint64_t raw = 0xdeadda1adeadda1a; + memcpy(&raw, &value, sizeof(value)); + WriteCPURegister(operand.GetCPURegister(), raw, log_mode); + } else { + VIXL_ASSERT(operand.IsMemOperand()); + MemWrite(ComputeMemOperandAddress(operand.GetMemOperand()), value); + } + } + + bool ReadN() const { return nzcv_.GetN() != 0; } + VIXL_DEPRECATED("ReadN", bool N() const) { return ReadN(); } + + bool ReadZ() const { return nzcv_.GetZ() != 0; } + VIXL_DEPRECATED("ReadZ", bool Z() const) { return ReadZ(); } + + bool ReadC() const { return nzcv_.GetC() != 0; } + VIXL_DEPRECATED("ReadC", bool C() const) { return ReadC(); } + + bool ReadV() const { return nzcv_.GetV() != 0; } + VIXL_DEPRECATED("ReadV", bool V() const) { return ReadV(); } + + SimSystemRegister& ReadNzcv() { return nzcv_; } + VIXL_DEPRECATED("ReadNzcv", SimSystemRegister& nzcv()) { return ReadNzcv(); } + + // TODO: Find a way to make the fpcr_ members return the proper types, so + // these accessors are not necessary. + FPRounding ReadRMode() const { + return static_cast(fpcr_.GetRMode()); + } + VIXL_DEPRECATED("ReadRMode", FPRounding RMode()) { return ReadRMode(); } + + UseDefaultNaN ReadDN() const { + return fpcr_.GetDN() != 0 ? kUseDefaultNaN : kIgnoreDefaultNaN; + } + + VIXL_DEPRECATED("ReadDN", bool DN()) { + return ReadDN() == kUseDefaultNaN ? true : false; + } + + SimSystemRegister& ReadFpcr() { return fpcr_; } + VIXL_DEPRECATED("ReadFpcr", SimSystemRegister& fpcr()) { return ReadFpcr(); } + + // Specify relevant register formats for Print(V)Register and related helpers. + enum PrintRegisterFormat { + // The lane size. + kPrintRegLaneSizeB = 0 << 0, + kPrintRegLaneSizeH = 1 << 0, + kPrintRegLaneSizeS = 2 << 0, + kPrintRegLaneSizeW = kPrintRegLaneSizeS, + kPrintRegLaneSizeD = 3 << 0, + kPrintRegLaneSizeX = kPrintRegLaneSizeD, + kPrintRegLaneSizeQ = 4 << 0, + kPrintRegLaneSizeUnknown = 5 << 0, + + kPrintRegLaneSizeOffset = 0, + kPrintRegLaneSizeMask = 7 << 0, + + // The overall register size. + kPrintRegAsScalar = 0, + kPrintRegAsDVector = 1 << 3, + kPrintRegAsQVector = 2 << 3, + kPrintRegAsSVEVector = 3 << 3, + + kPrintRegAsVectorMask = 3 << 3, + + // Indicate floating-point format lanes. (This flag is only supported for + // S-, H-, and D-sized lanes.) + kPrintRegAsFP = 1 << 5, + + // With this flag, print helpers won't check that the upper bits are zero. + // This also forces the register name to be printed with the `reg` + // format. + // + // The flag is supported with any PrintRegisterFormat other than those with + // kPrintRegAsSVEVector. + kPrintRegPartial = 1 << 6, + +// Supported combinations. +// These exist so that they can be referred to by name, but also because C++ +// does not allow enum types to hold values that aren't explicitly +// enumerated, and we want to be able to combine the above flags. + +// Scalar formats. +#define VIXL_DECL_PRINT_REG_SCALAR(size) \ + kPrint##size##Reg = kPrintRegLaneSize##size | kPrintRegAsScalar, \ + kPrint##size##RegPartial = kPrintRegLaneSize##size | kPrintRegPartial +#define VIXL_DECL_PRINT_REG_SCALAR_FP(size) \ + VIXL_DECL_PRINT_REG_SCALAR(size) \ + , kPrint##size##RegFP = kPrint##size##Reg | kPrintRegAsFP, \ + kPrint##size##RegPartialFP = kPrint##size##RegPartial | kPrintRegAsFP + VIXL_DECL_PRINT_REG_SCALAR(W), + VIXL_DECL_PRINT_REG_SCALAR(X), + VIXL_DECL_PRINT_REG_SCALAR_FP(H), + VIXL_DECL_PRINT_REG_SCALAR_FP(S), + VIXL_DECL_PRINT_REG_SCALAR_FP(D), + VIXL_DECL_PRINT_REG_SCALAR(Q), +#undef VIXL_DECL_PRINT_REG_SCALAR +#undef VIXL_DECL_PRINT_REG_SCALAR_FP + +#define VIXL_DECL_PRINT_REG_NEON(count, type, size) \ + kPrintReg##count##type = kPrintRegLaneSize##type | kPrintRegAs##size, \ + kPrintReg##count##type##Partial = kPrintReg##count##type | kPrintRegPartial +#define VIXL_DECL_PRINT_REG_NEON_FP(count, type, size) \ + VIXL_DECL_PRINT_REG_NEON(count, type, size) \ + , kPrintReg##count##type##FP = kPrintReg##count##type | kPrintRegAsFP, \ + kPrintReg##count##type##PartialFP = \ + kPrintReg##count##type##Partial | kPrintRegAsFP + VIXL_DECL_PRINT_REG_NEON(1, B, Scalar), + VIXL_DECL_PRINT_REG_NEON(8, B, DVector), + VIXL_DECL_PRINT_REG_NEON(16, B, QVector), + VIXL_DECL_PRINT_REG_NEON_FP(1, H, Scalar), + VIXL_DECL_PRINT_REG_NEON_FP(4, H, DVector), + VIXL_DECL_PRINT_REG_NEON_FP(8, H, QVector), + VIXL_DECL_PRINT_REG_NEON_FP(1, S, Scalar), + VIXL_DECL_PRINT_REG_NEON_FP(2, S, DVector), + VIXL_DECL_PRINT_REG_NEON_FP(4, S, QVector), + VIXL_DECL_PRINT_REG_NEON_FP(1, D, Scalar), + VIXL_DECL_PRINT_REG_NEON_FP(2, D, QVector), + VIXL_DECL_PRINT_REG_NEON(1, Q, Scalar), +#undef VIXL_DECL_PRINT_REG_NEON +#undef VIXL_DECL_PRINT_REG_NEON_FP + +#define VIXL_DECL_PRINT_REG_SVE(type) \ + kPrintRegVn##type = kPrintRegLaneSize##type | kPrintRegAsSVEVector, \ + kPrintRegVn##type##Partial = kPrintRegVn##type | kPrintRegPartial +#define VIXL_DECL_PRINT_REG_SVE_FP(type) \ + VIXL_DECL_PRINT_REG_SVE(type) \ + , kPrintRegVn##type##FP = kPrintRegVn##type | kPrintRegAsFP, \ + kPrintRegVn##type##PartialFP = kPrintRegVn##type##Partial | kPrintRegAsFP + VIXL_DECL_PRINT_REG_SVE(B), + VIXL_DECL_PRINT_REG_SVE_FP(H), + VIXL_DECL_PRINT_REG_SVE_FP(S), + VIXL_DECL_PRINT_REG_SVE_FP(D), + VIXL_DECL_PRINT_REG_SVE(Q) +#undef VIXL_DECL_PRINT_REG_SVE +#undef VIXL_DECL_PRINT_REG_SVE_FP + }; + + // Return `format` with the kPrintRegPartial flag set. + PrintRegisterFormat GetPrintRegPartial(PrintRegisterFormat format) { + // Every PrintRegisterFormat has a kPrintRegPartial counterpart, so the + // result of this cast will always be well-defined. + return static_cast(format | kPrintRegPartial); + } + + // For SVE formats, return the format of a Q register part of it. + PrintRegisterFormat GetPrintRegAsQChunkOfSVE(PrintRegisterFormat format) { + VIXL_ASSERT((format & kPrintRegAsVectorMask) == kPrintRegAsSVEVector); + // Keep the FP and lane size fields. + int q_format = format & (kPrintRegLaneSizeMask | kPrintRegAsFP); + // The resulting format must always be partial, because we're not formatting + // the whole Z register. + q_format |= (kPrintRegAsQVector | kPrintRegPartial); + + // This cast is always safe because NEON QVector formats support every + // combination of FP and lane size that SVE formats do. + return static_cast(q_format); + } + + unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) { + VIXL_ASSERT((format & kPrintRegLaneSizeMask) != kPrintRegLaneSizeUnknown); + return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset; + } + + unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) { + return 1 << GetPrintRegLaneSizeInBytesLog2(format); + } + + unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) { + switch (format & kPrintRegAsVectorMask) { + case kPrintRegAsScalar: + return GetPrintRegLaneSizeInBytesLog2(format); + case kPrintRegAsDVector: + return kDRegSizeInBytesLog2; + case kPrintRegAsQVector: + return kQRegSizeInBytesLog2; + default: + case kPrintRegAsSVEVector: + // We print SVE vectors in Q-sized chunks. These need special handling, + // and it's probably an error to call this function in that case. + VIXL_UNREACHABLE(); + return kQRegSizeInBytesLog2; + } + } + + unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) { + return 1 << GetPrintRegSizeInBytesLog2(format); + } + + unsigned GetPrintRegSizeInBitsLog2(PrintRegisterFormat format) { + return GetPrintRegSizeInBytesLog2(format) + kBitsPerByteLog2; + } + + unsigned GetPrintRegSizeInBits(PrintRegisterFormat format) { + return 1 << GetPrintRegSizeInBitsLog2(format); + } + + const char* GetPartialRegSuffix(PrintRegisterFormat format) { + switch (GetPrintRegSizeInBitsLog2(format)) { + case kBRegSizeLog2: + return "<7:0>"; + case kHRegSizeLog2: + return "<15:0>"; + case kSRegSizeLog2: + return "<31:0>"; + case kDRegSizeLog2: + return "<63:0>"; + case kQRegSizeLog2: + return "<127:0>"; + } + VIXL_UNREACHABLE(); + return ""; + } + + unsigned GetPrintRegLaneCount(PrintRegisterFormat format) { + unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format); + unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format); + VIXL_ASSERT(reg_size_log2 >= lane_size_log2); + return 1 << (reg_size_log2 - lane_size_log2); + } + + uint16_t GetPrintRegLaneMask(PrintRegisterFormat format) { + int print_as = format & kPrintRegAsVectorMask; + if (print_as == kPrintRegAsScalar) return 1; + + // Vector formats, including SVE formats printed in Q-sized chunks. + static const uint16_t masks[] = {0xffff, 0x5555, 0x1111, 0x0101, 0x0001}; + unsigned size_in_bytes_log2 = GetPrintRegLaneSizeInBytesLog2(format); + VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(masks)); + uint16_t mask = masks[size_in_bytes_log2]; + + // Exclude lanes that aren't visible in D vectors. + if (print_as == kPrintRegAsDVector) mask &= 0x00ff; + return mask; + } + + PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size, + unsigned lane_size); + + PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) { + return GetPrintRegisterFormatForSize(size, size); + } + + PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) { + switch (size) { + default: + VIXL_UNREACHABLE(); + return kPrintDReg; + case kDRegSizeInBytes: + return kPrintDReg; + case kSRegSizeInBytes: + return kPrintSReg; + case kHRegSizeInBytes: + return kPrintHReg; + } + } + + PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) { + if ((GetPrintRegLaneSizeInBytes(format) == kHRegSizeInBytes) || + (GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) || + (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) { + return static_cast(format | kPrintRegAsFP); + } + return format; + } + + PrintRegisterFormat GetPrintRegisterFormatForSizeTryFP(unsigned size) { + return GetPrintRegisterFormatTryFP(GetPrintRegisterFormatForSize(size)); + } + + template + PrintRegisterFormat GetPrintRegisterFormat(T value) { + return GetPrintRegisterFormatForSize(sizeof(value)); + } + + PrintRegisterFormat GetPrintRegisterFormat(double value) { + VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes); + return GetPrintRegisterFormatForSizeFP(sizeof(value)); + } + + PrintRegisterFormat GetPrintRegisterFormat(float value) { + VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes); + return GetPrintRegisterFormatForSizeFP(sizeof(value)); + } + + PrintRegisterFormat GetPrintRegisterFormat(Float16 value) { + VIXL_STATIC_ASSERT(sizeof(Float16ToRawbits(value)) == kHRegSizeInBytes); + return GetPrintRegisterFormatForSizeFP(sizeof(Float16ToRawbits(value))); + } + + PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform); + PrintRegisterFormat GetPrintRegisterFormatFP(VectorFormat vform); + + // Print all registers of the specified types. + void PrintRegisters(); + void PrintVRegisters(); + void PrintZRegisters(); + void PrintSystemRegisters(); + + // As above, but only print the registers that have been updated. + void PrintWrittenRegisters(); + void PrintWrittenVRegisters(); + void PrintWrittenPRegisters(); + + // As above, but respect LOG_REG and LOG_VREG. + void LogWrittenRegisters() { + if (ShouldTraceRegs()) PrintWrittenRegisters(); + } + void LogWrittenVRegisters() { + if (ShouldTraceVRegs()) PrintWrittenVRegisters(); + } + void LogWrittenPRegisters() { + if (ShouldTraceVRegs()) PrintWrittenPRegisters(); + } + void LogAllWrittenRegisters() { + LogWrittenRegisters(); + LogWrittenVRegisters(); + LogWrittenPRegisters(); + } + + // The amount of space to leave for a register name. This is used to keep the + // values vertically aligned. The longest register name has the form + // "z31<2047:1920>". The total overall value indentation must also take into + // account the fixed formatting: "# {name}: 0x{value}". + static const int kPrintRegisterNameFieldWidth = 14; + + // Print whole, individual register values. + // - The format can be used to restrict how much of the register is printed, + // but such formats indicate that the unprinted high-order bits are zero and + // these helpers will assert that. + // - If the format includes the kPrintRegAsFP flag then human-friendly FP + // value annotations will be printed. + // - The suffix can be used to add annotations (such as memory access + // details), or to suppress the newline. + void PrintRegister(int code, + PrintRegisterFormat format = kPrintXReg, + const char* suffix = "\n"); + void PrintVRegister(int code, + PrintRegisterFormat format = kPrintReg1Q, + const char* suffix = "\n"); + // PrintZRegister and PrintPRegister print over several lines, so they cannot + // allow the suffix to be overridden. + void PrintZRegister(int code, PrintRegisterFormat format = kPrintRegVnQ); + void PrintPRegister(int code, PrintRegisterFormat format = kPrintRegVnQ); + void PrintFFR(PrintRegisterFormat format = kPrintRegVnQ); + // Print a single Q-sized part of a Z register, or the corresponding two-byte + // part of a P register. These print single lines, and therefore allow the + // suffix to be overridden. The format must include the kPrintRegPartial flag. + void PrintPartialZRegister(int code, + int q_index, + PrintRegisterFormat format = kPrintRegVnQ, + const char* suffix = "\n"); + void PrintPartialPRegister(int code, + int q_index, + PrintRegisterFormat format = kPrintRegVnQ, + const char* suffix = "\n"); + void PrintPartialPRegister(const char* name, + const SimPRegister& reg, + int q_index, + PrintRegisterFormat format = kPrintRegVnQ, + const char* suffix = "\n"); + + // Like Print*Register (above), but respect trace parameters. + void LogRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceRegs()) PrintRegister(code, format); + } + void LogVRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintVRegister(code, format); + } + void LogZRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintZRegister(code, format); + } + void LogPRegister(unsigned code, PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintPRegister(code, format); + } + void LogFFR(PrintRegisterFormat format) { + if (ShouldTraceVRegs()) PrintFFR(format); + } + + // Other state updates, including system registers. + void PrintSystemRegister(SystemRegister id); + void PrintTakenBranch(const Instruction* target); + void LogSystemRegister(SystemRegister id) { + if (ShouldTraceSysRegs()) PrintSystemRegister(id); + } + void LogTakenBranch(const Instruction* target) { + if (ShouldTraceBranches()) PrintTakenBranch(target); + } + + // Trace memory accesses. + + // Common, contiguous register accesses (such as for scalars). + // The *Write variants automatically set kPrintRegPartial on the format. + void PrintRead(int rt_code, PrintRegisterFormat format, uintptr_t address); + void PrintExtendingRead(int rt_code, + PrintRegisterFormat format, + int access_size_in_bytes, + uintptr_t address); + void PrintWrite(int rt_code, PrintRegisterFormat format, uintptr_t address); + void PrintVRead(int rt_code, PrintRegisterFormat format, uintptr_t address); + void PrintVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address); + // Simple, unpredicated SVE accesses always access the whole vector, and never + // know the lane type, so there's no need to accept a `format`. + void PrintZRead(int rt_code, uintptr_t address) { + vregisters_[rt_code].NotifyRegisterLogged(); + PrintZAccess(rt_code, "<-", address); + } + void PrintZWrite(int rt_code, uintptr_t address) { + PrintZAccess(rt_code, "->", address); + } + void PrintPRead(int rt_code, uintptr_t address) { + pregisters_[rt_code].NotifyRegisterLogged(); + PrintPAccess(rt_code, "<-", address); + } + void PrintPWrite(int rt_code, uintptr_t address) { + PrintPAccess(rt_code, "->", address); + } + + // Like Print* (above), but respect GetTraceParameters(). + void LogRead(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceRegs()) PrintRead(rt_code, format, address); + } + void LogExtendingRead(int rt_code, + PrintRegisterFormat format, + int access_size_in_bytes, + uintptr_t address) { + if (ShouldTraceRegs()) { + PrintExtendingRead(rt_code, format, access_size_in_bytes, address); + } + } + void LogWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceWrites()) PrintWrite(rt_code, format, address); + } + void LogVRead(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceVRegs()) PrintVRead(rt_code, format, address); + } + void LogVWrite(int rt_code, PrintRegisterFormat format, uintptr_t address) { + if (ShouldTraceWrites()) PrintVWrite(rt_code, format, address); + } + void LogZRead(int rt_code, uintptr_t address) { + if (ShouldTraceVRegs()) PrintZRead(rt_code, address); + } + void LogZWrite(int rt_code, uintptr_t address) { + if (ShouldTraceWrites()) PrintZWrite(rt_code, address); + } + void LogPRead(int rt_code, uintptr_t address) { + if (ShouldTraceVRegs()) PrintPRead(rt_code, address); + } + void LogPWrite(int rt_code, uintptr_t address) { + if (ShouldTraceWrites()) PrintPWrite(rt_code, address); + } + void LogMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value) { + if (ShouldTraceWrites()) PrintMemTransfer(dst, src, value); + } + // Helpers for the above, where the access operation is parameterised. + // - For loads, set op = "<-". + // - For stores, set op = "->". + void PrintAccess(int rt_code, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + void PrintVAccess(int rt_code, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + void PrintMemTransfer(uintptr_t dst, uintptr_t src, uint8_t value); + // Simple, unpredicated SVE accesses always access the whole vector, and never + // know the lane type, so these don't accept a `format`. + void PrintZAccess(int rt_code, const char* op, uintptr_t address); + void PrintPAccess(int rt_code, const char* op, uintptr_t address); + + // Multiple-structure accesses. + void PrintVStructAccess(int rt_code, + int reg_count, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + // Single-structure (single-lane) accesses. + void PrintVSingleStructAccess(int rt_code, + int reg_count, + int lane, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + // Replicating accesses. + void PrintVReplicatingStructAccess(int rt_code, + int reg_count, + PrintRegisterFormat format, + const char* op, + uintptr_t address); + + // Multiple-structure accesses. + void PrintZStructAccess(int rt_code, + int reg_count, + const LogicPRegister& pg, + PrintRegisterFormat format, + int msize_in_bytes, + const char* op, + const LogicSVEAddressVector& addr); + + // Register-printing helper for all structured accessors. + // + // All lanes (according to `format`) are printed, but lanes indicated by + // `focus_mask` are of particular interest. Each bit corresponds to a byte in + // the printed register, in a manner similar to SVE's predicates. Currently, + // this is used to determine when to print human-readable FP annotations. + void PrintVRegistersForStructuredAccess(int rt_code, + int reg_count, + uint16_t focus_mask, + PrintRegisterFormat format); + + // As for the VRegister variant, but print partial Z register names. + void PrintZRegistersForStructuredAccess(int rt_code, + int q_index, + int reg_count, + uint16_t focus_mask, + PrintRegisterFormat format); + + // Print part of a memory access. This should be used for annotating + // non-trivial accesses, such as structured or sign-extending loads. Call + // Print*Register (or Print*RegistersForStructuredAccess), then + // PrintPartialAccess for each contiguous access that makes up the + // instruction. + // + // access_mask: + // The lanes to be printed. Each bit corresponds to a byte in the printed + // register, in a manner similar to SVE's predicates, except that the + // lane size is not respected when interpreting lane_mask: unaligned bits + // must be zeroed. + // + // This function asserts that this mask is non-zero. + // + // future_access_mask: + // The lanes to be printed by a future invocation. This must be specified + // because vertical lines are drawn for partial accesses that haven't yet + // been printed. The format is the same as for accessed_mask. + // + // If a lane is active in both `access_mask` and `future_access_mask`, + // `access_mask` takes precedence. + // + // struct_element_count: + // The number of elements in each structure. For non-structured accesses, + // set this to one. Along with lane_size_in_bytes, this is used determine + // the size of each access, and to format the accessed value. + // + // op: + // For stores, use "->". For loads, use "<-". + // + // address: + // The address of this partial access. (Not the base address of the whole + // instruction.) The traced value is read from this address (according to + // part_count and lane_size_in_bytes) so it must be accessible, and when + // tracing stores, the store must have been executed before this function + // is called. + // + // reg_size_in_bytes: + // The size of the register being accessed. This helper is usually used + // for V registers or Q-sized chunks of Z registers, so that is the + // default, but it is possible to use this to annotate X register + // accesses by specifying kXRegSizeInBytes. + // + // The return value is a future_access_mask suitable for the next iteration, + // so that it is possible to execute this in a loop, until the mask is zero. + // Note that accessed_mask must still be updated by the caller for each call. + uint16_t PrintPartialAccess(uint16_t access_mask, + uint16_t future_access_mask, + int struct_element_count, + int lane_size_in_bytes, + const char* op, + uintptr_t address, + int reg_size_in_bytes = kQRegSizeInBytes); + + // Print an abstract register value. This works for all register types, and + // can print parts of registers. This exists to ensure consistent formatting + // of values. + void PrintRegisterValue(const uint8_t* value, + int value_size, + PrintRegisterFormat format); + template + void PrintRegisterValue(const T& sim_register, PrintRegisterFormat format) { + PrintRegisterValue(sim_register.GetBytes(), + std::min(sim_register.GetSizeInBytes(), + kQRegSizeInBytes), + format); + } + + // As above, but format as an SVE predicate value, using binary notation with + // spaces between each bit so that they align with the Z register bytes that + // they predicate. + void PrintPRegisterValue(uint16_t value); + + void PrintRegisterValueFPAnnotations(const uint8_t* value, + uint16_t lane_mask, + PrintRegisterFormat format); + template + void PrintRegisterValueFPAnnotations(const T& sim_register, + uint16_t lane_mask, + PrintRegisterFormat format) { + PrintRegisterValueFPAnnotations(sim_register.GetBytes(), lane_mask, format); + } + template + void PrintRegisterValueFPAnnotations(const T& sim_register, + PrintRegisterFormat format) { + PrintRegisterValueFPAnnotations(sim_register.GetBytes(), + GetPrintRegLaneMask(format), + format); + } + + VIXL_NO_RETURN void DoUnreachable(const Instruction* instr); + void DoTrace(const Instruction* instr); + void DoLog(const Instruction* instr); + + static const char* WRegNameForCode(unsigned code, + Reg31Mode mode = Reg31IsZeroRegister); + static const char* XRegNameForCode(unsigned code, + Reg31Mode mode = Reg31IsZeroRegister); + static const char* BRegNameForCode(unsigned code); + static const char* HRegNameForCode(unsigned code); + static const char* SRegNameForCode(unsigned code); + static const char* DRegNameForCode(unsigned code); + static const char* VRegNameForCode(unsigned code); + static const char* ZRegNameForCode(unsigned code); + static const char* PRegNameForCode(unsigned code); + + bool IsColouredTrace() const { return coloured_trace_; } + VIXL_DEPRECATED("IsColouredTrace", bool coloured_trace() const) { + return IsColouredTrace(); + } + + void SetColouredTrace(bool value); + VIXL_DEPRECATED("SetColouredTrace", void set_coloured_trace(bool value)) { + SetColouredTrace(value); + } + + // Values for traces parameters defined in simulator-constants-aarch64.h in + // enum TraceParameters. + int GetTraceParameters() const { return trace_parameters_; } + VIXL_DEPRECATED("GetTraceParameters", int trace_parameters() const) { + return GetTraceParameters(); + } + + bool ShouldTraceWrites() const { + return (GetTraceParameters() & LOG_WRITE) != 0; + } + bool ShouldTraceRegs() const { + return (GetTraceParameters() & LOG_REGS) != 0; + } + bool ShouldTraceVRegs() const { + return (GetTraceParameters() & LOG_VREGS) != 0; + } + bool ShouldTraceSysRegs() const { + return (GetTraceParameters() & LOG_SYSREGS) != 0; + } + bool ShouldTraceBranches() const { + return (GetTraceParameters() & LOG_BRANCH) != 0; + } + + void SetTraceParameters(int parameters); + VIXL_DEPRECATED("SetTraceParameters", + void set_trace_parameters(int parameters)) { + SetTraceParameters(parameters); + } + + // Clear the simulated local monitor to force the next store-exclusive + // instruction to fail. + void ClearLocalMonitor() { local_monitor_.Clear(); } + + void SilenceExclusiveAccessWarning() { + print_exclusive_access_warning_ = false; + } + + void CheckIsValidUnalignedAtomicAccess(int rn, + uint64_t address, + unsigned access_size) { + // Verify that the address is available to the host. + VIXL_ASSERT(address == static_cast(address)); + + if (GetCPUFeatures()->Has(CPUFeatures::kUSCAT)) { + // Check that the access falls entirely within one atomic access granule. + if (AlignDown(address, kAtomicAccessGranule) != + AlignDown(address + access_size - 1, kAtomicAccessGranule)) { + VIXL_ALIGNMENT_EXCEPTION(); + } + } else { + // Check that the access is aligned. + if (AlignDown(address, access_size) != address) { + VIXL_ALIGNMENT_EXCEPTION(); + } + } + + // The sp must be aligned to 16 bytes when it is accessed. + if ((rn == kSpRegCode) && (AlignDown(address, 16) != address)) { + VIXL_ALIGNMENT_EXCEPTION(); + } + } + + enum PointerType { kDataPointer, kInstructionPointer }; + + struct PACKey { + uint64_t high; + uint64_t low; + int number; + }; + + // Current implementation is that all pointers are tagged. + bool HasTBI(uint64_t ptr, PointerType type) { + USE(ptr, type); + return true; + } + + // Current implementation uses 48-bit virtual addresses. + int GetBottomPACBit(uint64_t ptr, int ttbr) { + USE(ptr, ttbr); + VIXL_ASSERT((ttbr == 0) || (ttbr == 1)); + return 48; + } + + // The top PAC bit is 55 for the purposes of relative bit fields with TBI, + // however bit 55 is the TTBR bit regardless of TBI so isn't part of the PAC + // codes in pointers. + int GetTopPACBit(uint64_t ptr, PointerType type) { + return HasTBI(ptr, type) ? 55 : 63; + } + + // Armv8.3 Pointer authentication helpers. + uint64_t CalculatePACMask(uint64_t ptr, PointerType type, int ext_bit); + uint64_t ComputePAC(uint64_t data, uint64_t context, PACKey key); + uint64_t AuthPAC(uint64_t ptr, + uint64_t context, + PACKey key, + PointerType type); + uint64_t AddPAC(uint64_t ptr, uint64_t context, PACKey key, PointerType type); + uint64_t StripPAC(uint64_t ptr, PointerType type); + void PACHelper(int dst, + int src, + PACKey key, + decltype(&Simulator::AddPAC) pac_fn); + + // Armv8.5 MTE helpers. + uint64_t ChooseNonExcludedTag(uint64_t tag, + uint64_t offset, + uint64_t exclude = 0) { + VIXL_ASSERT(IsUint4(tag) && IsUint4(offset) && IsUint16(exclude)); + + if (exclude == 0xffff) { + return 0; + } + + if (offset == 0) { + while ((exclude & (1 << tag)) != 0) { + tag = (tag + 1) % 16; + } + } + + while (offset > 0) { + offset--; + tag = (tag + 1) % 16; + while ((exclude & (1 << tag)) != 0) { + tag = (tag + 1) % 16; + } + } + return tag; + } + + uint64_t GetAddressWithAllocationTag(uint64_t addr, uint64_t tag) { + VIXL_ASSERT(IsUint4(tag)); + return (addr & ~(UINT64_C(0xf) << 56)) | (tag << 56); + } + + // Create or remove a mapping with memory protection. Memory attributes such + // as MTE and BTI are represented by metadata in Simulator. + void* Mmap( + void* address, size_t length, int prot, int flags, int fd, off_t offset); + + int Munmap(void* address, size_t length, int prot); + + // The common CPUFeatures interface with the set of available features. + + CPUFeatures* GetCPUFeatures() { + return cpu_features_auditor_.GetCPUFeatures(); + } + + void SetCPUFeatures(const CPUFeatures& cpu_features) { + cpu_features_auditor_.SetCPUFeatures(cpu_features); + } + + // The set of features that the simulator has encountered. + const CPUFeatures& GetSeenFeatures() { + return cpu_features_auditor_.GetSeenFeatures(); + } + void ResetSeenFeatures() { cpu_features_auditor_.ResetSeenFeatures(); } + +// Runtime call emulation support. +// It requires VIXL's ABI features, and C++11 or greater. +// Also, the initialisation of the tuples in RuntimeCall(Non)Void is incorrect +// in GCC before 4.9.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51253 +#if defined(VIXL_HAS_ABI_SUPPORT) && __cplusplus >= 201103L && \ + (defined(__clang__) || GCC_VERSION_OR_NEWER(4, 9, 1)) + +#define VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT + +// The implementation of the runtime call helpers require the functionality +// provided by `std::index_sequence`. It is only available from C++14, but +// we want runtime call simulation to work from C++11, so we emulate if +// necessary. +#if __cplusplus >= 201402L + template + using local_index_sequence = std::index_sequence; + template + using __local_index_sequence_for = std::index_sequence_for; +#else + // Emulate the behaviour of `std::index_sequence` and + // `std::index_sequence_for`. + // Naming follow the `std` names, prefixed with `emulated_`. + template + struct emulated_index_sequence {}; + + // A recursive template to create a sequence of indexes. + // The base case (for `N == 0`) is declared outside of the class scope, as + // required by C++. + template + struct emulated_make_index_sequence_helper + : emulated_make_index_sequence_helper {}; + + template + struct emulated_make_index_sequence : emulated_make_index_sequence_helper { + }; + + template + struct emulated_index_sequence_for + : emulated_make_index_sequence {}; + + template + using local_index_sequence = emulated_index_sequence; + template + using __local_index_sequence_for = emulated_index_sequence_for; +#endif + + // Expand the argument tuple and perform the call. + template + R DoRuntimeCall(R (*function)(P...), + std::tuple arguments, + local_index_sequence) { + USE(arguments); + return function(std::get(arguments)...); + } + + template + void RuntimeCallNonVoid(R (*function)(P...)) { + ABI abi; + std::tuple argument_operands{ + ReadGenericOperand

(abi.GetNextParameterGenericOperand

())...}; + R return_value = DoRuntimeCall(function, + argument_operands, + __local_index_sequence_for{}); + WriteGenericOperand(abi.GetReturnGenericOperand(), return_value); + } + + template + void RuntimeCallVoid(R (*function)(P...)) { + ABI abi; + std::tuple argument_operands{ + ReadGenericOperand

(abi.GetNextParameterGenericOperand

())...}; + DoRuntimeCall(function, + argument_operands, + __local_index_sequence_for{}); + } + + // We use `struct` for `void` return type specialisation. + template + struct RuntimeCallStructHelper { + static void Wrapper(Simulator* simulator, uintptr_t function_pointer) { + R (*function)(P...) = reinterpret_cast(function_pointer); + simulator->RuntimeCallNonVoid(function); + } + }; + + // Partial specialization when the return type is `void`. + template + struct RuntimeCallStructHelper { + static void Wrapper(Simulator* simulator, uintptr_t function_pointer) { + void (*function)(P...) = + reinterpret_cast(function_pointer); + simulator->RuntimeCallVoid(function); + } + }; +#endif + + // Configure the simulated value of 'VL', which is the size of a Z register. + // Because this cannot occur during a program's lifetime, this function also + // resets the SVE registers. + void SetVectorLengthInBits(unsigned vector_length); + + unsigned GetVectorLengthInBits() const { return vector_length_; } + unsigned GetVectorLengthInBytes() const { + VIXL_ASSERT((vector_length_ % kBitsPerByte) == 0); + return vector_length_ / kBitsPerByte; + } + unsigned GetPredicateLengthInBits() const { + VIXL_ASSERT((GetVectorLengthInBits() % kZRegBitsPerPRegBit) == 0); + return GetVectorLengthInBits() / kZRegBitsPerPRegBit; + } + unsigned GetPredicateLengthInBytes() const { + VIXL_ASSERT((GetVectorLengthInBytes() % kZRegBitsPerPRegBit) == 0); + return GetVectorLengthInBytes() / kZRegBitsPerPRegBit; + } + + unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) const { + if (IsSVEFormat(vform)) { + return GetVectorLengthInBits(); + } else { + return vixl::aarch64::RegisterSizeInBitsFromFormat(vform); + } + } + + unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) const { + unsigned size_in_bits = RegisterSizeInBitsFromFormat(vform); + VIXL_ASSERT((size_in_bits % kBitsPerByte) == 0); + return size_in_bits / kBitsPerByte; + } + + int LaneCountFromFormat(VectorFormat vform) const { + if (IsSVEFormat(vform)) { + return GetVectorLengthInBits() / LaneSizeInBitsFromFormat(vform); + } else { + return vixl::aarch64::LaneCountFromFormat(vform); + } + } + + bool IsFirstActive(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (mask.IsActive(vform, i)) { + return bits.IsActive(vform, i); + } + } + return false; + } + + bool AreNoneActive(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (mask.IsActive(vform, i) && bits.IsActive(vform, i)) { + return false; + } + } + return true; + } + + bool IsLastActive(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + if (mask.IsActive(vform, i)) { + return bits.IsActive(vform, i); + } + } + return false; + } + + void PredTest(VectorFormat vform, + const LogicPRegister& mask, + const LogicPRegister& bits) { + ReadNzcv().SetN(IsFirstActive(vform, mask, bits)); + ReadNzcv().SetZ(AreNoneActive(vform, mask, bits)); + ReadNzcv().SetC(!IsLastActive(vform, mask, bits)); + ReadNzcv().SetV(0); + LogSystemRegister(NZCV); + } + + SimPRegister& GetPTrue() { return pregister_all_true_; } + + template + size_t CleanGranuleTag(T address, size_t length = kMTETagGranuleInBytes) { + size_t count = 0; + for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) { + count += + meta_data_.CleanMTETag(reinterpret_cast(address) + offset); + } + size_t expected = + length / kMTETagGranuleInBytes + (length % kMTETagGranuleInBytes != 0); + + // Give a warning when the memory region that is being unmapped isn't all + // either MTE protected or not. + if (count != expected) { + std::stringstream sstream; + sstream << std::hex << "MTE WARNING : the memory region being unmapped " + "starting at address 0x" + << reinterpret_cast(address) + << "is not fully MTE protected.\n"; + VIXL_WARNING(sstream.str().c_str()); + } + return count; + } + + template + void SetGranuleTag(T address, + int tag, + size_t length = kMTETagGranuleInBytes) { + for (size_t offset = 0; offset < length; offset += kMTETagGranuleInBytes) { + meta_data_.SetMTETag((uintptr_t)(address) + offset, tag); + } + } + + template + int GetGranuleTag(T address) { + return meta_data_.GetMTETag(address); + } + + // Generate a random address tag, and any tags specified in the input are + // excluded from the selection. + uint64_t GenerateRandomTag(uint16_t exclude = 0); + + // Register a new BranchInterception object. If 'function' is branched to + // (e.g: "bl function") in the future; instead, if provided, 'callback' will + // be called otherwise a runtime call will be performed on 'function'. + // + // For example: this can be used to always perform runtime calls on + // non-AArch64 functions without using the macroassembler. + template + void RegisterBranchInterception(R (*function)(P...), + InterceptionCallback callback = nullptr) { + meta_data_.RegisterBranchInterception(*function, callback); + } + + protected: + const char* clr_normal; + const char* clr_flag_name; + const char* clr_flag_value; + const char* clr_reg_name; + const char* clr_reg_value; + const char* clr_vreg_name; + const char* clr_vreg_value; + const char* clr_preg_name; + const char* clr_preg_value; + const char* clr_memory_address; + const char* clr_warning; + const char* clr_warning_message; + const char* clr_printf; + const char* clr_branch_marker; + + // Simulation helpers ------------------------------------ + + void ResetSystemRegisters(); + void ResetRegisters(); + void ResetVRegisters(); + void ResetPRegisters(); + void ResetFFR(); + + bool ConditionPassed(Condition cond) { + switch (cond) { + case eq: + return ReadZ(); + case ne: + return !ReadZ(); + case hs: + return ReadC(); + case lo: + return !ReadC(); + case mi: + return ReadN(); + case pl: + return !ReadN(); + case vs: + return ReadV(); + case vc: + return !ReadV(); + case hi: + return ReadC() && !ReadZ(); + case ls: + return !(ReadC() && !ReadZ()); + case ge: + return ReadN() == ReadV(); + case lt: + return ReadN() != ReadV(); + case gt: + return !ReadZ() && (ReadN() == ReadV()); + case le: + return !(!ReadZ() && (ReadN() == ReadV())); + case nv: + VIXL_FALLTHROUGH(); + case al: + return true; + default: + VIXL_UNREACHABLE(); + return false; + } + } + + bool ConditionPassed(Instr cond) { + return ConditionPassed(static_cast(cond)); + } + + bool ConditionFailed(Condition cond) { return !ConditionPassed(cond); } + + void AddSubHelper(const Instruction* instr, int64_t op2); + uint64_t AddWithCarry(unsigned reg_size, + bool set_flags, + uint64_t left, + uint64_t right, + int carry_in = 0); + std::pair AddWithCarry(unsigned reg_size, + uint64_t left, + uint64_t right, + int carry_in); + using vixl_uint128_t = std::pair; + vixl_uint128_t Add128(vixl_uint128_t x, vixl_uint128_t y); + vixl_uint128_t Mul64(uint64_t x, uint64_t y); + vixl_uint128_t Neg128(vixl_uint128_t x); + void LogicalHelper(const Instruction* instr, int64_t op2); + void ConditionalCompareHelper(const Instruction* instr, int64_t op2); + void LoadStoreHelper(const Instruction* instr, + int64_t offset, + AddrMode addrmode); + void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode); + template + void CompareAndSwapHelper(const Instruction* instr); + template + void CompareAndSwapPairHelper(const Instruction* instr); + template + void AtomicMemorySimpleHelper(const Instruction* instr); + template + void AtomicMemorySwapHelper(const Instruction* instr); + template + void LoadAcquireRCpcHelper(const Instruction* instr); + template + void LoadAcquireRCpcUnscaledOffsetHelper(const Instruction* instr); + template + void StoreReleaseUnscaledOffsetHelper(const Instruction* instr); + uintptr_t AddressModeHelper(unsigned addr_reg, + int64_t offset, + AddrMode addrmode); + void NEONLoadStoreMultiStructHelper(const Instruction* instr, + AddrMode addr_mode); + void NEONLoadStoreSingleStructHelper(const Instruction* instr, + AddrMode addr_mode); + template + void MOPSPHelper(const Instruction* instr) { + VIXL_ASSERT(instr->IsConsistentMOPSTriplet()); + + int d = instr->GetRd(); + int n = instr->GetRn(); + int s = instr->GetRs(); + + // Aliased registers and xzr are disallowed for Xd and Xn. + if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31)) { + VisitUnallocated(instr); + } + + // Additionally, Xs may not be xzr for cpy. + if ((mops_type == "cpy"_h) && (s == 31)) { + VisitUnallocated(instr); + } + + // Bits 31 and 30 must be zero. + if (instr->ExtractBits(31, 30) != 0) { + VisitUnallocated(instr); + } + + // Saturate copy count. + uint64_t xn = ReadXRegister(n); + int saturation_bits = (mops_type == "cpy"_h) ? 55 : 63; + if ((xn >> saturation_bits) != 0) { + xn = (UINT64_C(1) << saturation_bits) - 1; + if (mops_type == "setg"_h) { + // Align saturated value to granule. + xn &= ~UINT64_C(kMTETagGranuleInBytes - 1); + } + WriteXRegister(n, xn); + } + + ReadNzcv().SetN(0); + ReadNzcv().SetZ(0); + ReadNzcv().SetC(1); // Indicates "option B" implementation. + ReadNzcv().SetV(0); + } + + int64_t ShiftOperand(unsigned reg_size, + uint64_t value, + Shift shift_type, + unsigned amount) const; + int64_t ExtendValue(unsigned reg_width, + int64_t value, + Extend extend_type, + unsigned left_shift = 0) const; + uint64_t PolynomialMult(uint64_t op1, + uint64_t op2, + int lane_size_in_bits) const; + + void ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr); + void ld1(VectorFormat vform, LogicVRegister dst, int index, uint64_t addr); + void ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr); + void ld1r(VectorFormat vform, + VectorFormat unpack_vform, + LogicVRegister dst, + uint64_t addr, + bool is_signed = false); + void ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr); + void ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + int index, + uint64_t addr); + void ld2r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr); + void ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr); + void ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + int index, + uint64_t addr); + void ld3r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr); + void ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr); + void ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + int index, + uint64_t addr); + void ld4r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr); + void st1(VectorFormat vform, LogicVRegister src, uint64_t addr); + void st1(VectorFormat vform, LogicVRegister src, int index, uint64_t addr); + void st2(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + uint64_t addr); + void st2(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + int index, + uint64_t addr); + void st3(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + uint64_t addr); + void st3(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + int index, + uint64_t addr); + void st4(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + LogicVRegister src4, + uint64_t addr); + void st4(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + LogicVRegister src4, + int index, + uint64_t addr); + LogicVRegister cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int imm, + Condition cond); + LogicVRegister cmptst(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister add(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + // Add `value` to each lane of `src1`, treating `value` as unsigned for the + // purposes of setting the saturation flags. + LogicVRegister add_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value); + LogicVRegister addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicPRegister brka(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn); + LogicPRegister brkb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn); + LogicPRegister brkn(LogicPRegister pdm, + const LogicPRegister& pg, + const LogicPRegister& pn); + LogicPRegister brkpa(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm); + LogicPRegister brkpb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm); + // dst = srca + src1 * src2 + LogicVRegister mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2); + // dst = srca - src1 * src2 + LogicVRegister mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister pmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sdiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister udiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + + typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister fmulx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister smulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister umulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sqdmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqrdmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sqrdmlsh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister sub(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + // Subtract `value` from each lane of `src1`, treating `value` as unsigned for + // the purposes of setting the saturation flags. + LogicVRegister sub_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value); + LogicVRegister and_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister orn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister eor(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm); + LogicVRegister bif(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister bsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src_mask, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister cls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister clz(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister cnot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister cnt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister not_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rbit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rev(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rev_byte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rev_size); + LogicVRegister rev16(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rev32(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister rev64(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister addlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_signed, + bool do_accumulate); + LogicVRegister saddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uaddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister ror(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rotation); + LogicVRegister ext(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index); + LogicVRegister rotate_elements_right(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int index); + template + LogicVRegister fcadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + LogicVRegister fcadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + template + LogicVRegister fcmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + const LogicVRegister& acc, + int index, + int rot); + LogicVRegister fcmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot); + LogicVRegister fcmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + const LogicVRegister& acc, + int rot); + template + LogicVRegister fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister cadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot, + bool saturate = false); + LogicVRegister cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + LogicVRegister cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot); + LogicVRegister bgrp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_bext = false); + LogicVRegister bdep(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister histogram(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_segmented = false); + LogicVRegister index(VectorFormat vform, + LogicVRegister dst, + uint64_t start, + uint64_t step); + LogicVRegister ins_element(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + int src_index); + LogicVRegister ins_immediate(VectorFormat vform, + LogicVRegister dst, + int dst_index, + uint64_t imm); + LogicVRegister insr(VectorFormat vform, LogicVRegister dst, uint64_t imm); + LogicVRegister dup_element(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index); + LogicVRegister dup_elements_to_segments(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index); + LogicVRegister dup_elements_to_segments( + VectorFormat vform, + LogicVRegister dst, + const std::pair& src_and_index); + LogicVRegister dup_immediate(VectorFormat vform, + LogicVRegister dst, + uint64_t imm); + LogicVRegister mov(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicPRegister mov(LogicPRegister dst, const LogicPRegister& src); + LogicVRegister mov_merging(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src); + LogicVRegister mov_zeroing(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src); + LogicVRegister mov_alternating(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int start_at); + LogicPRegister mov_merging(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); + LogicPRegister mov_zeroing(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); + LogicVRegister movi(VectorFormat vform, LogicVRegister dst, uint64_t imm); + LogicVRegister mvni(VectorFormat vform, LogicVRegister dst, uint64_t imm); + LogicVRegister orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm); + LogicVRegister sshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool shift_is_8bit = true); + LogicVRegister ushl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool shift_is_8bit = true); + LogicVRegister sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + // Perform a "conditional last" operation. The first part of the pair is true + // if any predicate lane is active, false otherwise. The second part takes the + // value of the last active (plus offset) lane, or last (plus offset) lane if + // none active. + std::pair clast(VectorFormat vform, + const LogicPRegister& pg, + const LogicVRegister& src2, + int offset_from_last_active); + LogicPRegister match(VectorFormat vform, + LogicPRegister dst, + const LogicVRegister& haystack, + const LogicVRegister& needles, + bool negate_match); + LogicVRegister compact(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister splice(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sel(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicPRegister sel(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src1, + const LogicPRegister& src2); + LogicVRegister sminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max); + LogicVRegister smax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister smin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sminmaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max); + LogicVRegister smaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister addv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uaddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister saddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + bool max); + LogicVRegister smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_2 = false); + LogicVRegister uxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_2 = false); + LogicVRegister sxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits); + LogicVRegister sxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind); + LogicVRegister tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind); + LogicVRegister Table(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& ind, + bool zero_out_of_bounds, + const LogicVRegister* tab1, + const LogicVRegister* tab2 = NULL, + const LogicVRegister* tab3 = NULL, + const LogicVRegister* tab4 = NULL); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind); + LogicVRegister tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind); + LogicVRegister uaddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister saddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ssubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max); + LogicVRegister umax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister umin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminmaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max); + LogicVRegister umaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + bool max); + LogicVRegister umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister trn1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister trn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister zip1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister zip2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uzp1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uzp2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister shl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister scvtf(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); + LogicVRegister scvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding rounding_mode); + LogicVRegister ucvtf(VectorFormat vform, + unsigned dst_data_size, + unsigned src_data_size, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); + LogicVRegister ucvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding rounding_mode); + LogicVRegister sshll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sshll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister shll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister shll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister ushll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ushll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sli(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sri(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ssra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister usra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister srsra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister ursra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister suqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister sqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshlu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister abs(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister neg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister extractnarrow(VectorFormat vform, + LogicVRegister dst, + bool dst_is_signed, + const LogicVRegister& src, + bool src_is_signed); + LogicVRegister xtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister uqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister sqxtun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister absdiff(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_signed); + LogicVRegister saba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister uaba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister shrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister shrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister rshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister rshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister uqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift); + LogicVRegister sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round = true); + LogicVRegister dot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_src1_signed, + bool is_src2_signed); + LogicVRegister sdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister udot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister usdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister cdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& acc, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + LogicVRegister sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot); + LogicVRegister sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot); + LogicVRegister sqrdmlash(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round = true, + bool sub_op = false); + LogicVRegister sqrdmlash_d(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round = true, + bool sub_op = false); + LogicVRegister sqrdmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round = true); + LogicVRegister sqrdmlsh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round = true); + LogicVRegister sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister matmul(VectorFormat vform_dst, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool src1_signed, + bool src2_signed); + template + LogicVRegister fmatmul(VectorFormat vform, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmatmul(VectorFormat vform, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2); +#define NEON_3VREG_LOGIC_LIST(V) \ + V(addhn) \ + V(addhn2) \ + V(raddhn) \ + V(raddhn2) \ + V(subhn) \ + V(subhn2) \ + V(rsubhn) \ + V(rsubhn2) \ + V(pmull) \ + V(pmull2) \ + V(sabal) \ + V(sabal2) \ + V(uabal) \ + V(uabal2) \ + V(sabdl) \ + V(sabdl2) \ + V(uabdl) \ + V(uabdl2) \ + V(smull2) \ + V(umull2) \ + V(smlal2) \ + V(umlal2) \ + V(smlsl2) \ + V(umlsl2) \ + V(sqdmlal2) \ + V(sqdmlsl2) \ + V(sqdmull2) + +#define DEFINE_LOGIC_FUNC(FXN) \ + LogicVRegister FXN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); + NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC) +#undef DEFINE_LOGIC_FUNC + +#define NEON_MULL_LIST(V) \ + V(smull) \ + V(umull) \ + V(smlal) \ + V(umlal) \ + V(smlsl) \ + V(umlsl) \ + V(sqdmlal) \ + V(sqdmlsl) \ + V(sqdmull) + +#define DECLARE_NEON_MULL_OP(FN) \ + LogicVRegister FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2, \ + bool is_2 = false); + NEON_MULL_LIST(DECLARE_NEON_MULL_OP) +#undef DECLARE_NEON_MULL_OP + +#define NEON_FP3SAME_LIST(V) \ + V(fadd, FPAdd, false) \ + V(fsub, FPSub, true) \ + V(fmul, FPMul, true) \ + V(fmulx, FPMulx, true) \ + V(fdiv, FPDiv, true) \ + V(fmax, FPMax, false) \ + V(fmin, FPMin, false) \ + V(fmaxnm, FPMaxNM, false) \ + V(fminnm, FPMinNM, false) + +#define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ + template \ + LogicVRegister FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); \ + LogicVRegister FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); + NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP) +#undef DECLARE_NEON_FP_VECTOR_OP + +#define NEON_FPPAIRWISE_LIST(V) \ + V(faddp, fadd, FPAdd) \ + V(fmaxp, fmax, FPMax) \ + V(fmaxnmp, fmaxnm, FPMaxNM) \ + V(fminp, fmin, FPMin) \ + V(fminnmp, fminnm, FPMinNM) + +#define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \ + LogicVRegister FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2); \ + LogicVRegister FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src); + NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP) +#undef DECLARE_NEON_FP_PAIR_OP + + enum FrintMode { + kFrintToInteger = 0, + kFrintToInt32 = 32, + kFrintToInt64 = 64 + }; + + template + LogicVRegister frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + template + LogicVRegister frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + template + LogicVRegister fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2); + template + LogicVRegister fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fnmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + + LogicVRegister fmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + + template + LogicVRegister fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister fabscmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond); + LogicVRegister fcmp_zero(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + Condition cond); + + template + LogicVRegister fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + template + LogicVRegister frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister ftsmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ftssel(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister ftmad(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + unsigned index); + LogicVRegister fexpa(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister flogb(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + template + LogicVRegister fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + template + LogicVRegister fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fabd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2); + LogicVRegister frint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + bool inexact_exception = false, + FrintMode frint_mode = kFrintToInteger); + LogicVRegister fcvt(VectorFormat dst_vform, + VectorFormat src_vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister fcvts(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); + LogicVRegister fcvts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + int fbits = 0); + LogicVRegister fcvtu(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits = 0); + LogicVRegister fcvtu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + int fbits = 0); + LogicVRegister fcvtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtxn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fcvtxn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fsqrt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister frsqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister frecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding); + LogicVRegister ursqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister urecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + LogicPRegister pfalse(LogicPRegister dst); + LogicPRegister pfirst(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); + LogicPRegister ptrue(VectorFormat vform, LogicPRegister dst, int pattern); + LogicPRegister pnext(VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src); + + LogicVRegister asrd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int shift); + + LogicVRegister andv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister eorv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister orv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister saddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister sminv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister uaddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister uminv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + LogicVRegister umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src); + + LogicVRegister interleave_top_bottom(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + template + struct TFPPairOp { + typedef T (Simulator::*type)(T a, T b); + }; + + template + LogicVRegister FPPairedAcrossHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp::type fn, + uint64_t inactive_value); + + LogicVRegister FPPairedAcrossHelper( + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp::type fn16, + typename TFPPairOp::type fn32, + typename TFPPairOp::type fn64, + uint64_t inactive_value); + + LogicVRegister fminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fminnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister fmaxnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + LogicVRegister faddv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + static const uint32_t CRC32_POLY = 0x04C11DB7; + static const uint32_t CRC32C_POLY = 0x1EDC6F41; + uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly); + template + uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly); + uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly); + + void SysOp_W(int op, int64_t val); + + template + T FPRecipSqrtEstimate(T op); + template + T FPRecipEstimate(T op, FPRounding rounding); + template + R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding); + + void FPCompare(double val0, double val1, FPTrapFlags trap); + double FPRoundInt(double value, FPRounding round_mode); + double FPRoundInt(double value, FPRounding round_mode, FrintMode frint_mode); + double FPRoundIntCommon(double value, FPRounding round_mode); + double recip_sqrt_estimate(double a); + double recip_estimate(double a); + double FPRecipSqrtEstimate(double a); + double FPRecipEstimate(double a); + double FixedToDouble(int64_t src, int fbits, FPRounding round_mode); + double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode); + float FixedToFloat(int64_t src, int fbits, FPRounding round_mode); + float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode); + ::vixl::internal::SimFloat16 FixedToFloat16(int64_t src, + int fbits, + FPRounding round_mode); + ::vixl::internal::SimFloat16 UFixedToFloat16(uint64_t src, + int fbits, + FPRounding round_mode); + int16_t FPToInt16(double value, FPRounding rmode); + int32_t FPToInt32(double value, FPRounding rmode); + int64_t FPToInt64(double value, FPRounding rmode); + uint16_t FPToUInt16(double value, FPRounding rmode); + uint32_t FPToUInt32(double value, FPRounding rmode); + uint64_t FPToUInt64(double value, FPRounding rmode); + int32_t FPToFixedJS(double value); + + template + T FPAdd(T op1, T op2); + + template + T FPNeg(T op); + + template + T FPDiv(T op1, T op2); + + template + T FPMax(T a, T b); + + template + T FPMaxNM(T a, T b); + + template + T FPMin(T a, T b); + + template + T FPMinNM(T a, T b); + + template + T FPMulNaNs(T op1, T op2); + + template + T FPMul(T op1, T op2); + + template + T FPMulx(T op1, T op2); + + template + T FPMulAdd(T a, T op1, T op2); + + template + T FPSqrt(T op); + + template + T FPSub(T op1, T op2); + + template + T FPRecipStepFused(T op1, T op2); + + template + T FPRSqrtStepFused(T op1, T op2); + + // This doesn't do anything at the moment. We'll need it if we want support + // for cumulative exception bits or floating-point exceptions. + void FPProcessException() {} + + bool FPProcessNaNs(const Instruction* instr); + + // Pseudo Printf instruction + void DoPrintf(const Instruction* instr); + + // Pseudo-instructions to configure CPU features dynamically. + void DoConfigureCPUFeatures(const Instruction* instr); + + void DoSaveCPUFeatures(const Instruction* instr); + void DoRestoreCPUFeatures(const Instruction* instr); + + // General arithmetic helpers ---------------------------- + + // Add `delta` to the accumulator (`acc`), optionally saturate, then zero- or + // sign-extend. Initial `acc` bits outside `n` are ignored, but the delta must + // be a valid int_t. + uint64_t IncDecN(uint64_t acc, + int64_t delta, + unsigned n, + bool is_saturating = false, + bool is_signed = false); + + // SVE helpers ------------------------------------------- + LogicVRegister SVEBitwiseLogicalUnpredicatedHelper(LogicalOp op, + VectorFormat vform, + LogicVRegister zd, + const LogicVRegister& zn, + const LogicVRegister& zm); + + LogicPRegister SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, + LogicPRegister Pd, + const LogicPRegister& pn, + const LogicPRegister& pm); + + LogicVRegister SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op, + VectorFormat vform, + LogicVRegister zd, + uint64_t imm); + enum UnpackType { kHiHalf, kLoHalf }; + enum ExtendType { kSignedExtend, kUnsignedExtend }; + LogicVRegister unpk(VectorFormat vform, + LogicVRegister zd, + const LogicVRegister& zn, + UnpackType unpack_type, + ExtendType extend_type); + + LogicPRegister SVEIntCompareVectorsHelper(Condition cc, + VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& mask, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements = false, + FlagsUpdate flags = SetFlags); + + void SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr, + VectorFormat vform, + SVEOffsetModifier mod); + + // Store each active zt[lane] to `addr.GetElementAddress(lane, ...)`. + // + // `zt_code` specifies the code of the first register (zt). Each additional + // register (up to `reg_count`) is `(zt_code + i) % 32`. + // + // This helper calls LogZWrite in the proper way, according to `addr`. + void SVEStructuredStoreHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr); + // Load each active zt[lane] from `addr.GetElementAddress(lane, ...)`. + void SVEStructuredLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + bool is_signed = false); + + enum SVEFaultTolerantLoadType { + // - Elements active in both FFR and pg are accessed as usual. If the access + // fails, the corresponding lane and all subsequent lanes are filled with + // an unpredictable value, and made inactive in FFR. + // + // - Elements active in FFR but not pg are set to zero. + // + // - Elements that are not active in FFR are filled with an unpredictable + // value, regardless of pg. + kSVENonFaultLoad, + + // If type == kSVEFirstFaultLoad, the behaviour is the same, except that the + // first active element is always accessed, regardless of FFR, and will + // generate a real fault if it is inaccessible. If the lane is not active in + // FFR, the actual value loaded into the result is still unpredictable. + kSVEFirstFaultLoad + }; + + // Load with first-faulting or non-faulting load semantics, respecting and + // updating FFR. + void SVEFaultTolerantLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + SVEFaultTolerantLoadType type, + bool is_signed); + + LogicVRegister SVEBitwiseShiftHelper(Shift shift_op, + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements); + + // Pack all even- or odd-numbered elements of source vector side by side and + // place in elements of lower half the destination vector, and leave the upper + // half all zero. + // [...| H | G | F | E | D | C | B | A ] + // => [...................| G | E | C | A ] + LogicVRegister pack_even_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + // [...| H | G | F | E | D | C | B | A ] + // => [...................| H | F | D | B ] + LogicVRegister pack_odd_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src); + + LogicVRegister adcl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool top); + + template + LogicVRegister FTMaddHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + uint64_t coeff_pos, + uint64_t coeff_neg); + + // Return the first or last active lane, or -1 if none are active. + int GetFirstActive(VectorFormat vform, const LogicPRegister& pg) const; + int GetLastActive(VectorFormat vform, const LogicPRegister& pg) const; + + int CountActiveLanes(VectorFormat vform, const LogicPRegister& pg) const; + + // Count active and true lanes in `pn`. + int CountActiveAndTrueLanes(VectorFormat vform, + const LogicPRegister& pg, + const LogicPRegister& pn) const; + + // Count the number of lanes referred to by `pattern`, given the vector + // length. If `pattern` is not a recognised SVEPredicateConstraint, this + // returns zero. + int GetPredicateConstraintLaneCount(VectorFormat vform, int pattern) const; + + // Simulate a runtime call. + void DoRuntimeCall(const Instruction* instr); + + // Processor state --------------------------------------- + + // Simulated monitors for exclusive access instructions. + SimExclusiveLocalMonitor local_monitor_; + SimExclusiveGlobalMonitor global_monitor_; + + // Output stream. + FILE* stream_; + PrintDisassembler* print_disasm_; + + // General purpose registers. Register 31 is the stack pointer. + SimRegister registers_[kNumberOfRegisters]; + + // Vector registers + SimVRegister vregisters_[kNumberOfVRegisters]; + + // SVE predicate registers. + SimPRegister pregisters_[kNumberOfPRegisters]; + + // SVE first-fault register. + SimFFRRegister ffr_register_; + + // A pseudo SVE predicate register with all bits set to true. + SimPRegister pregister_all_true_; + + // Program Status Register. + // bits[31, 27]: Condition flags N, Z, C, and V. + // (Negative, Zero, Carry, Overflow) + SimSystemRegister nzcv_; + + // Floating-Point Control Register + SimSystemRegister fpcr_; + + // Only a subset of FPCR features are supported by the simulator. This helper + // checks that the FPCR settings are supported. + // + // This is checked when floating-point instructions are executed, not when + // FPCR is set. This allows generated code to modify FPCR for external + // functions, or to save and restore it when entering and leaving generated + // code. + void AssertSupportedFPCR() { + // No flush-to-zero support. + VIXL_ASSERT(ReadFpcr().GetFZ() == 0); + // Ties-to-even rounding only. + VIXL_ASSERT(ReadFpcr().GetRMode() == FPTieEven); + // No alternative half-precision support. + VIXL_ASSERT(ReadFpcr().GetAHP() == 0); + } + + static int CalcNFlag(uint64_t result, unsigned reg_size) { + return (result >> (reg_size - 1)) & 1; + } + + static int CalcZFlag(uint64_t result) { return (result == 0) ? 1 : 0; } + + static const uint32_t kConditionFlagsMask = 0xf0000000; + + Memory memory_; + + static const size_t kDefaultStackGuardStartSize = 0; + static const size_t kDefaultStackGuardEndSize = 4 * 1024; + static const size_t kDefaultStackUsableSize = 8 * 1024; + + Decoder* decoder_; + // Indicates if the pc has been modified by the instruction and should not be + // automatically incremented. + bool pc_modified_; + const Instruction* pc_; + + // Pointer to the last simulated instruction, used for checking the validity + // of the current instruction with the previous instruction, such as movprfx. + Instruction const* last_instr_; + + // Branch type register, used for branch target identification. + BType btype_; + + // Next value of branch type register after the current instruction has been + // decoded. + BType next_btype_; + + // Global flag for enabling guarded pages. + // TODO: implement guarding at page granularity, rather than globally. + bool guard_pages_; + + static const char* xreg_names[]; + static const char* wreg_names[]; + static const char* breg_names[]; + static const char* hreg_names[]; + static const char* sreg_names[]; + static const char* dreg_names[]; + static const char* vreg_names[]; + static const char* zreg_names[]; + static const char* preg_names[]; + + private: + using FormToVisitorFnMap = + std::unordered_map>; + static const FormToVisitorFnMap* GetFormToVisitorFnMap(); + + uint32_t form_hash_; + + static const PACKey kPACKeyIA; + static const PACKey kPACKeyIB; + static const PACKey kPACKeyDA; + static const PACKey kPACKeyDB; + static const PACKey kPACKeyGA; + + bool CanReadMemory(uintptr_t address, size_t size); + + // CanReadMemory needs placeholder file descriptors, so we use a pipe. We can + // save some system call overhead by opening them on construction, rather than + // on every call to CanReadMemory. + int placeholder_pipe_fd_[2]; + + template + static T FPDefaultNaN(); + + // Standard NaN processing. + template + T FPProcessNaN(T op) { + VIXL_ASSERT(IsNaN(op)); + if (IsSignallingNaN(op)) { + FPProcessException(); + } + return (ReadDN() == kUseDefaultNaN) ? FPDefaultNaN() : ToQuietNaN(op); + } + + template + T FPProcessNaNs(T op1, T op2) { + if (IsSignallingNaN(op1)) { + return FPProcessNaN(op1); + } else if (IsSignallingNaN(op2)) { + return FPProcessNaN(op2); + } else if (IsNaN(op1)) { + VIXL_ASSERT(IsQuietNaN(op1)); + return FPProcessNaN(op1); + } else if (IsNaN(op2)) { + VIXL_ASSERT(IsQuietNaN(op2)); + return FPProcessNaN(op2); + } else { + return 0.0; + } + } + + template + T FPProcessNaNs3(T op1, T op2, T op3) { + if (IsSignallingNaN(op1)) { + return FPProcessNaN(op1); + } else if (IsSignallingNaN(op2)) { + return FPProcessNaN(op2); + } else if (IsSignallingNaN(op3)) { + return FPProcessNaN(op3); + } else if (IsNaN(op1)) { + VIXL_ASSERT(IsQuietNaN(op1)); + return FPProcessNaN(op1); + } else if (IsNaN(op2)) { + VIXL_ASSERT(IsQuietNaN(op2)); + return FPProcessNaN(op2); + } else if (IsNaN(op3)) { + VIXL_ASSERT(IsQuietNaN(op3)); + return FPProcessNaN(op3); + } else { + return 0.0; + } + } + + // Construct a SimVRegister from a SimPRegister, where each byte-sized lane of + // the destination is set to all true (0xff) when the corresponding + // predicate flag is set, and false (0x00) otherwise. + SimVRegister ExpandToSimVRegister(const SimPRegister& preg); + + // Set each predicate flag in pd where the corresponding assigned-sized lane + // in vreg is non-zero. Clear the flag, otherwise. This is almost the opposite + // operation to ExpandToSimVRegister(), except that any non-zero lane is + // interpreted as true. + void ExtractFromSimVRegister(VectorFormat vform, + SimPRegister& pd, // NOLINT(runtime/references) + SimVRegister vreg); + + bool coloured_trace_; + + // A set of TraceParameters flags. + int trace_parameters_; + + // Indicates whether the exclusive-access warning has been printed. + bool print_exclusive_access_warning_; + void PrintExclusiveAccessWarning(); + + CPUFeaturesAuditor cpu_features_auditor_; + std::vector saved_cpu_features_; + + // State for *rand48 functions, used to simulate randomness with repeatable + // behaviour (so that tests are deterministic). This is used to simulate RNDR + // and RNDRRS, as well as to simulate a source of entropy for architecturally + // undefined behaviour. + uint16_t rand_state_[3]; + + // A configurable size of SVE vector registers. + unsigned vector_length_; + + // Representation of memory attributes such as MTE tagging and BTI page + // protection in addition to branch interceptions. + MetaDataDepot meta_data_; +}; + +#if defined(VIXL_HAS_SIMULATED_RUNTIME_CALL_SUPPORT) && __cplusplus < 201402L +// Base case of the recursive template used to emulate C++14 +// `std::index_sequence`. +template +struct Simulator::emulated_make_index_sequence_helper<0, I...> + : Simulator::emulated_index_sequence {}; +#endif + +template +void MetaDataDepot::BranchInterception::operator()( + Simulator* simulator) const { + if (callback_ == nullptr) { + Simulator::RuntimeCallStructHelper:: + Wrapper(simulator, reinterpret_cast(function_)); + } else { + callback_(reinterpret_cast(function_)); + } +} + +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 + +#endif // VIXL_AARCH64_SIMULATOR_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/aarch64/simulator-constants-aarch64.h b/3rdparty/vixl/include/vixl/aarch64/simulator-constants-aarch64.h new file mode 100644 index 0000000000..1aa4f851f3 --- /dev/null +++ b/3rdparty/vixl/include/vixl/aarch64/simulator-constants-aarch64.h @@ -0,0 +1,194 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_AARCH64_SIMULATOR_CONSTANTS_AARCH64_H_ +#define VIXL_AARCH64_SIMULATOR_CONSTANTS_AARCH64_H_ + +#include "instructions-aarch64.h" + +namespace vixl { +namespace aarch64 { + +// Debug instructions. +// +// VIXL's macro-assembler and simulator support a few pseudo instructions to +// make debugging easier. These pseudo instructions do not exist on real +// hardware. +// +// TODO: Also consider allowing these pseudo-instructions to be disabled in the +// simulator, so that users can check that the input is a valid native code. +// (This isn't possible in all cases. Printf won't work, for example.) +// +// Each debug pseudo instruction is represented by a HLT instruction. The HLT +// immediate field is used to identify the type of debug pseudo instruction. + +enum DebugHltOpcode { + kUnreachableOpcode = 0xdeb0, + kPrintfOpcode, + kTraceOpcode, + kLogOpcode, + kRuntimeCallOpcode, + kSetCPUFeaturesOpcode, + kEnableCPUFeaturesOpcode, + kDisableCPUFeaturesOpcode, + kSaveCPUFeaturesOpcode, + kRestoreCPUFeaturesOpcode, + kMTEActive, + kMTEInactive, + // Aliases. + kDebugHltFirstOpcode = kUnreachableOpcode, + kDebugHltLastOpcode = kLogOpcode +}; +VIXL_DEPRECATED("DebugHltOpcode", typedef DebugHltOpcode DebugHltOpcodes); + +// Each pseudo instruction uses a custom encoding for additional arguments, as +// described below. + +// Unreachable - kUnreachableOpcode +// +// Instruction which should never be executed. This is used as a guard in parts +// of the code that should not be reachable, such as in data encoded inline in +// the instructions. + +// Printf - kPrintfOpcode +// - arg_count: The number of arguments. +// - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields. +// +// Simulate a call to printf. +// +// Floating-point and integer arguments are passed in separate sets of registers +// in AAPCS64 (even for varargs functions), so it is not possible to determine +// the type of each argument without some information about the values that were +// passed in. This information could be retrieved from the printf format string, +// but the format string is not trivial to parse so we encode the relevant +// information with the HLT instruction. +// +// Also, the following registers are populated (as if for a native Aarch64 +// call): +// x0: The format string +// x1-x7: Optional arguments, if type == CPURegister::kRegister +// d0-d7: Optional arguments, if type == CPURegister::kVRegister +const unsigned kPrintfArgCountOffset = 1 * kInstructionSize; +const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize; +const unsigned kPrintfLength = 3 * kInstructionSize; + +const unsigned kPrintfMaxArgCount = 4; + +// The argument pattern is a set of two-bit-fields, each with one of the +// following values: +enum PrintfArgPattern { + kPrintfArgW = 1, + kPrintfArgX = 2, + // There is no kPrintfArgS because floats are always converted to doubles in C + // varargs calls. + kPrintfArgD = 3 +}; +static const unsigned kPrintfArgPatternBits = 2; + +// Trace - kTraceOpcode +// - parameter: TraceParameter stored as a uint32_t +// - command: TraceCommand stored as a uint32_t +// +// Allow for trace management in the generated code. This enables or disables +// automatic tracing of the specified information for every simulated +// instruction. +const unsigned kTraceParamsOffset = 1 * kInstructionSize; +const unsigned kTraceCommandOffset = 2 * kInstructionSize; +const unsigned kTraceLength = 3 * kInstructionSize; + +// Trace parameters. +enum TraceParameters { + LOG_DISASM = 1 << 0, // Log disassembly. + LOG_REGS = 1 << 1, // Log general purpose registers. + LOG_VREGS = 1 << 2, // Log SVE, NEON and floating-point registers. + LOG_SYSREGS = 1 << 3, // Log the flags and system registers. + LOG_WRITE = 1 << 4, // Log writes to memory. + LOG_BRANCH = 1 << 5, // Log taken branches. + + LOG_NONE = 0, + LOG_STATE = LOG_REGS | LOG_VREGS | LOG_SYSREGS, + LOG_ALL = LOG_DISASM | LOG_STATE | LOG_WRITE | LOG_BRANCH +}; + +// Trace commands. +enum TraceCommand { TRACE_ENABLE = 1, TRACE_DISABLE = 2 }; + +// Log - kLogOpcode +// - parameter: TraceParameter stored as a uint32_t +// +// Print the specified information once. This mechanism is separate from Trace. +// In particular, _all_ of the specified registers are printed, rather than just +// the registers that the instruction writes. +// +// Any combination of the TraceParameters values can be used, except that +// LOG_DISASM is not supported for Log. +const unsigned kLogParamsOffset = 1 * kInstructionSize; +const unsigned kLogLength = 2 * kInstructionSize; + +// Runtime call simulation - kRuntimeCallOpcode +enum RuntimeCallType { kCallRuntime, kTailCallRuntime }; + +const unsigned kRuntimeCallWrapperOffset = 1 * kInstructionSize; +// The size of a pointer on host. +const unsigned kRuntimeCallAddressSize = sizeof(uintptr_t); +const unsigned kRuntimeCallFunctionOffset = + kRuntimeCallWrapperOffset + kRuntimeCallAddressSize; +const unsigned kRuntimeCallTypeOffset = + kRuntimeCallFunctionOffset + kRuntimeCallAddressSize; +const unsigned kRuntimeCallLength = kRuntimeCallTypeOffset + sizeof(uint32_t); + +// Enable or disable CPU features - kSetCPUFeaturesOpcode +// - kEnableCPUFeaturesOpcode +// - kDisableCPUFeaturesOpcode +// - parameter[...]: A list of `CPUFeatures::Feature`s, encoded as +// ConfigureCPUFeaturesElementType and terminated with CPUFeatures::kNone. +// - [Padding to align to kInstructionSize.] +// +// 'Set' completely overwrites the existing CPU features. +// 'Enable' and 'Disable' update the existing CPU features. +// +// These mechanisms allows users to strictly check the use of CPU features in +// different regions of code. +// +// These have no effect on the set of 'seen' features (as reported by +// CPUFeaturesAuditor::HasSeen(...)). +typedef uint8_t ConfigureCPUFeaturesElementType; +const unsigned kConfigureCPUFeaturesListOffset = 1 * kInstructionSize; + +// Save or restore CPU features - kSaveCPUFeaturesOpcode +// - kRestoreCPUFeaturesOpcode +// +// These mechanisms provide a stack-like mechanism for preserving the CPU +// features, or restoring the last-preserved features. These pseudo-instructions +// take no arguments. +// +// These have no effect on the set of 'seen' features (as reported by +// CPUFeaturesAuditor::HasSeen(...)). + +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_AARCH64_SIMULATOR_CONSTANTS_AARCH64_H_ diff --git a/3rdparty/vixl/include/vixl/assembler-base-vixl.h b/3rdparty/vixl/include/vixl/assembler-base-vixl.h new file mode 100644 index 0000000000..ff866c4fbe --- /dev/null +++ b/3rdparty/vixl/include/vixl/assembler-base-vixl.h @@ -0,0 +1,104 @@ +// Copyright 2016, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_ASSEMBLER_BASE_H +#define VIXL_ASSEMBLER_BASE_H + +#include "code-buffer-vixl.h" + +// Microsoft Visual C++ defines a `mvn` macro that conflicts with our own +// definition. +#if defined(_MSC_VER) && defined(mvn) +#undef mvn +#endif + +namespace vixl { + +class CodeBufferCheckScope; + +namespace internal { + +class AssemblerBase { + public: + AssemblerBase(byte* buffer, size_t capacity) + : buffer_(buffer, capacity), allow_assembler_(false) {} + + virtual ~AssemblerBase() {} + + // Finalize a code buffer of generated instructions. This function must be + // called before executing or copying code from the buffer. + void FinalizeCode() { GetBuffer()->SetClean(); } + + ptrdiff_t GetCursorOffset() const { return GetBuffer().GetCursorOffset(); } + + // Return the address of the cursor. + template + T GetCursorAddress() const { + VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); + return GetBuffer().GetOffsetAddress(GetCursorOffset()); + } + + size_t GetSizeOfCodeGenerated() const { return GetCursorOffset(); } + + // Accessors. + CodeBuffer* GetBuffer() { return &buffer_; } + const CodeBuffer& GetBuffer() const { return buffer_; } + bool AllowAssembler() const { return allow_assembler_; } + + protected: + void SetAllowAssembler(bool allow) { allow_assembler_ = allow; } + + // CodeBufferCheckScope must be able to temporarily allow the assembler. + friend class vixl::CodeBufferCheckScope; + + // Buffer where the code is emitted. + CodeBuffer buffer_; + + private: + bool allow_assembler_; + + public: + // Deprecated public interface. + + // Return the address of an offset in the buffer. + template + VIXL_DEPRECATED("GetBuffer().GetOffsetAddress(offset)", + T GetOffsetAddress(ptrdiff_t offset) const) { + return GetBuffer().GetOffsetAddress(offset); + } + + // Return the address of the start of the buffer. + template + VIXL_DEPRECATED("GetBuffer().GetStartAddress()", + T GetStartAddress() const) { + return GetBuffer().GetOffsetAddress(0); + } +}; + +} // namespace internal +} // namespace vixl + +#endif // VIXL_ASSEMBLER_BASE_H diff --git a/3rdparty/vixl/include/vixl/code-buffer-vixl.h b/3rdparty/vixl/include/vixl/code-buffer-vixl.h new file mode 100644 index 0000000000..a59026894f --- /dev/null +++ b/3rdparty/vixl/include/vixl/code-buffer-vixl.h @@ -0,0 +1,160 @@ +// Copyright 2017, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_CODE_BUFFER_H +#define VIXL_CODE_BUFFER_H + +#include + +#include "globals-vixl.h" +#include "utils-vixl.h" + +namespace vixl { + +class CodeBuffer { + public: + CodeBuffer(byte* buffer, size_t capacity); + ~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION; + + void Reset(); + + ptrdiff_t GetOffsetFrom(ptrdiff_t offset) const { + ptrdiff_t cursor_offset = cursor_ - buffer_; + VIXL_ASSERT((offset >= 0) && (offset <= cursor_offset)); + return cursor_offset - offset; + } + VIXL_DEPRECATED("GetOffsetFrom", + ptrdiff_t OffsetFrom(ptrdiff_t offset) const) { + return GetOffsetFrom(offset); + } + + ptrdiff_t GetCursorOffset() const { return GetOffsetFrom(0); } + VIXL_DEPRECATED("GetCursorOffset", ptrdiff_t CursorOffset() const) { + return GetCursorOffset(); + } + + void Rewind(ptrdiff_t offset) { + byte* rewound_cursor = buffer_ + offset; + VIXL_ASSERT((buffer_ <= rewound_cursor) && (rewound_cursor <= cursor_)); + cursor_ = rewound_cursor; + } + + template + T GetOffsetAddress(ptrdiff_t offset) const { + VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); + VIXL_ASSERT((offset >= 0) && (offset <= (cursor_ - buffer_))); + return reinterpret_cast(buffer_ + offset); + } + + // Return the address of the start or end of the emitted code. + template + T GetStartAddress() const { + VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); + return GetOffsetAddress(0); + } + template + T GetEndAddress() const { + VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t)); + return GetOffsetAddress(GetSizeInBytes()); + } + + size_t GetRemainingBytes() const { + VIXL_ASSERT((cursor_ >= buffer_) && (cursor_ <= (buffer_ + capacity_))); + return (buffer_ + capacity_) - cursor_; + } + VIXL_DEPRECATED("GetRemainingBytes", size_t RemainingBytes() const) { + return GetRemainingBytes(); + } + + size_t GetSizeInBytes() const { + VIXL_ASSERT((cursor_ >= buffer_) && (cursor_ <= (buffer_ + capacity_))); + return cursor_ - buffer_; + } + + // A code buffer can emit: + // * 8, 16, 32 or 64-bit data: constant. + // * 16 or 32-bit data: instruction. + // * string: debug info. + void Emit8(uint8_t data) { Emit(data); } + + void Emit16(uint16_t data) { Emit(data); } + + void Emit32(uint32_t data) { Emit(data); } + + void Emit64(uint64_t data) { Emit(data); } + + void EmitString(const char* string); + + void EmitData(const void* data, size_t size); + + template + void Emit(T value) { + VIXL_ASSERT(HasSpaceFor(sizeof(value))); + dirty_ = true; + byte* c = cursor_; + memcpy(c, &value, sizeof(value)); + cursor_ = c + sizeof(value); + } + + void UpdateData(size_t offset, const void* data, size_t size); + + // Align to 32bit. + void Align(); + + // Ensure there is enough space for and emit 'n' zero bytes. + void EmitZeroedBytes(int n); + + bool Is16bitAligned() const { return IsAligned<2>(cursor_); } + + bool Is32bitAligned() const { return IsAligned<4>(cursor_); } + + size_t GetCapacity() const { return capacity_; } + VIXL_DEPRECATED("GetCapacity", size_t capacity() const) { + return GetCapacity(); + } + + bool IsDirty() const { return dirty_; } + + void SetClean() { dirty_ = false; } + + bool HasSpaceFor(size_t amount) const { + return GetRemainingBytes() >= amount; + } + + private: + // Backing store of the buffer. + byte* buffer_; + // Pointer to the next location to be written. + byte* cursor_; + // True if there has been any write since the buffer was created or cleaned. + bool dirty_; + // Capacity in bytes of the backing store. + size_t capacity_; +}; + +} // namespace vixl + +#endif // VIXL_CODE_BUFFER_H diff --git a/3rdparty/vixl/include/vixl/code-generation-scopes-vixl.h b/3rdparty/vixl/include/vixl/code-generation-scopes-vixl.h new file mode 100644 index 0000000000..2818ceda0d --- /dev/null +++ b/3rdparty/vixl/include/vixl/code-generation-scopes-vixl.h @@ -0,0 +1,322 @@ +// Copyright 2016, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef VIXL_CODE_GENERATION_SCOPES_H_ +#define VIXL_CODE_GENERATION_SCOPES_H_ + + +#include "assembler-base-vixl.h" +#include "macro-assembler-interface.h" + + +namespace vixl { + +// This scope will: +// - Allow code emission from the specified `Assembler`. +// - Optionally reserve space in the `CodeBuffer` (if it is managed by VIXL). +// - Optionally, on destruction, check the size of the generated code. +// (The size can be either exact or a maximum size.) +class CodeBufferCheckScope { + public: + // Tell whether or not the scope needs to ensure the associated CodeBuffer + // has enough space for the requested size. + enum BufferSpacePolicy { + kReserveBufferSpace, + kDontReserveBufferSpace, + + // Deprecated, but kept for backward compatibility. + kCheck = kReserveBufferSpace, + kNoCheck = kDontReserveBufferSpace + }; + + // Tell whether or not the scope should assert the amount of code emitted + // within the scope is consistent with the requested amount. + enum SizePolicy { + kNoAssert, // Do not check the size of the code emitted. + kExactSize, // The code emitted must be exactly size bytes. + kMaximumSize // The code emitted must be at most size bytes. + }; + + // This constructor implicitly calls `Open` to initialise the scope + // (`assembler` must not be `NULL`), so it is ready to use immediately after + // it has been constructed. + CodeBufferCheckScope(internal::AssemblerBase* assembler, + size_t size, + BufferSpacePolicy check_policy = kReserveBufferSpace, + SizePolicy size_policy = kMaximumSize) + : assembler_(NULL), initialised_(false) { + Open(assembler, size, check_policy, size_policy); + } + + // This constructor does not implicitly initialise the scope. Instead, the + // user is required to explicitly call the `Open` function before using the + // scope. + CodeBufferCheckScope() : assembler_(NULL), initialised_(false) { + // Nothing to do. + } + + virtual ~CodeBufferCheckScope() { Close(); } + + // This function performs the actual initialisation work. + void Open(internal::AssemblerBase* assembler, + size_t size, + BufferSpacePolicy check_policy = kReserveBufferSpace, + SizePolicy size_policy = kMaximumSize) { + VIXL_ASSERT(!initialised_); + VIXL_ASSERT(assembler != NULL); + assembler_ = assembler; + if (check_policy == kReserveBufferSpace) { + VIXL_ASSERT(assembler->GetBuffer()->HasSpaceFor(size)); + } +#ifdef VIXL_DEBUG + limit_ = assembler_->GetSizeOfCodeGenerated() + size; + assert_policy_ = size_policy; + previous_allow_assembler_ = assembler_->AllowAssembler(); + assembler_->SetAllowAssembler(true); +#else + USE(size_policy); +#endif + initialised_ = true; + } + + // This function performs the cleaning-up work. It must succeed even if the + // scope has not been opened. It is safe to call multiple times. + void Close() { +#ifdef VIXL_DEBUG + if (!initialised_) { + return; + } + assembler_->SetAllowAssembler(previous_allow_assembler_); + switch (assert_policy_) { + case kNoAssert: + break; + case kExactSize: + VIXL_ASSERT(assembler_->GetSizeOfCodeGenerated() == limit_); + break; + case kMaximumSize: + VIXL_ASSERT(assembler_->GetSizeOfCodeGenerated() <= limit_); + break; + default: + VIXL_UNREACHABLE(); + } +#endif + initialised_ = false; + } + + protected: + internal::AssemblerBase* assembler_; + SizePolicy assert_policy_; + size_t limit_; + bool previous_allow_assembler_; + bool initialised_; +}; + + +// This scope will: +// - Do the same as `CodeBufferCheckSCope`, but: +// - If managed by VIXL, always reserve space in the `CodeBuffer`. +// - Always check the size (exact or maximum) of the generated code on +// destruction. +// - Emit pools if the specified size would push them out of range. +// - Block pools emission for the duration of the scope. +// This scope allows the `Assembler` and `MacroAssembler` to be freely and +// safely mixed for its duration. +class EmissionCheckScope : public CodeBufferCheckScope { + public: + // This constructor implicitly calls `Open` (when `masm` is not `NULL`) to + // initialise the scope, so it is ready to use immediately after it has been + // constructed. + EmissionCheckScope(MacroAssemblerInterface* masm, + size_t size, + SizePolicy size_policy = kMaximumSize) { + Open(masm, size, size_policy); + } + + // This constructor does not implicitly initialise the scope. Instead, the + // user is required to explicitly call the `Open` function before using the + // scope. + EmissionCheckScope() {} + + virtual ~EmissionCheckScope() { Close(); } + + enum PoolPolicy { + // Do not forbid pool emission inside the scope. Pools will not be emitted + // on `Open` either. + kIgnorePools, + // Force pools to be generated on `Open` if necessary and block their + // emission inside the scope. + kBlockPools, + // Deprecated, but kept for backward compatibility. + kCheckPools = kBlockPools + }; + + void Open(MacroAssemblerInterface* masm, + size_t size, + SizePolicy size_policy = kMaximumSize) { + Open(masm, size, size_policy, kBlockPools); + } + + void Close() { + if (!initialised_) { + return; + } + if (masm_ == NULL) { + // Nothing to do. + return; + } + // Perform the opposite of `Open`, which is: + // - Check the code generation limit was not exceeded. + // - Release the pools. + CodeBufferCheckScope::Close(); + if (pool_policy_ == kBlockPools) { + masm_->ReleasePools(); + } + VIXL_ASSERT(!initialised_); + } + + protected: + void Open(MacroAssemblerInterface* masm, + size_t size, + SizePolicy size_policy, + PoolPolicy pool_policy) { + if (masm == NULL) { + // Nothing to do. + // We may reach this point in a context of conditional code generation. + // See `aarch64::MacroAssembler::MoveImmediateHelper()` for an example. + return; + } + masm_ = masm; + pool_policy_ = pool_policy; + if (pool_policy_ == kBlockPools) { + // To avoid duplicating the work to check that enough space is available + // in the buffer, do not use the more generic `EnsureEmitFor()`. It is + // done below when opening `CodeBufferCheckScope`. + masm->EnsureEmitPoolsFor(size); + masm->BlockPools(); + } + // The buffer should be checked *after* we emit the pools. + CodeBufferCheckScope::Open(masm->AsAssemblerBase(), + size, + kReserveBufferSpace, + size_policy); + VIXL_ASSERT(initialised_); + } + + // This constructor should only be used from code that is *currently + // generating* the pools, to avoid an infinite loop. + EmissionCheckScope(MacroAssemblerInterface* masm, + size_t size, + SizePolicy size_policy, + PoolPolicy pool_policy) { + Open(masm, size, size_policy, pool_policy); + } + + MacroAssemblerInterface* masm_; + PoolPolicy pool_policy_; +}; + +// Use this scope when you need a one-to-one mapping between methods and +// instructions. This scope will: +// - Do the same as `EmissionCheckScope`. +// - Block access to the MacroAssemblerInterface (using run-time assertions). +class ExactAssemblyScope : public EmissionCheckScope { + public: + // This constructor implicitly calls `Open` (when `masm` is not `NULL`) to + // initialise the scope, so it is ready to use immediately after it has been + // constructed. + ExactAssemblyScope(MacroAssemblerInterface* masm, + size_t size, + SizePolicy size_policy = kExactSize) { + Open(masm, size, size_policy); + } + + // This constructor does not implicitly initialise the scope. Instead, the + // user is required to explicitly call the `Open` function before using the + // scope. + ExactAssemblyScope() {} + + virtual ~ExactAssemblyScope() { Close(); } + + void Open(MacroAssemblerInterface* masm, + size_t size, + SizePolicy size_policy = kExactSize) { + Open(masm, size, size_policy, kBlockPools); + } + + void Close() { + if (!initialised_) { + return; + } + if (masm_ == NULL) { + // Nothing to do. + return; + } +#ifdef VIXL_DEBUG + masm_->SetAllowMacroInstructions(previous_allow_macro_assembler_); +#else + USE(previous_allow_macro_assembler_); +#endif + EmissionCheckScope::Close(); + } + + protected: + // This protected constructor allows overriding the pool policy. It is + // available to allow this scope to be used in code that handles generation + // of pools. + ExactAssemblyScope(MacroAssemblerInterface* masm, + size_t size, + SizePolicy assert_policy, + PoolPolicy pool_policy) { + Open(masm, size, assert_policy, pool_policy); + } + + void Open(MacroAssemblerInterface* masm, + size_t size, + SizePolicy size_policy, + PoolPolicy pool_policy) { + VIXL_ASSERT(size_policy != kNoAssert); + if (masm == NULL) { + // Nothing to do. + return; + } + // Rely on EmissionCheckScope::Open to initialise `masm_` and + // `pool_policy_`. + EmissionCheckScope::Open(masm, size, size_policy, pool_policy); +#ifdef VIXL_DEBUG + previous_allow_macro_assembler_ = masm->AllowMacroInstructions(); + masm->SetAllowMacroInstructions(false); +#endif + } + + private: + bool previous_allow_macro_assembler_; +}; + + +} // namespace vixl + +#endif // VIXL_CODE_GENERATION_SCOPES_H_ diff --git a/3rdparty/vixl/include/vixl/compiler-intrinsics-vixl.h b/3rdparty/vixl/include/vixl/compiler-intrinsics-vixl.h new file mode 100644 index 0000000000..50ed3579b7 --- /dev/null +++ b/3rdparty/vixl/include/vixl/compiler-intrinsics-vixl.h @@ -0,0 +1,167 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef VIXL_COMPILER_INTRINSICS_H +#define VIXL_COMPILER_INTRINSICS_H + +#include +#include "globals-vixl.h" + +namespace vixl { + +// Helper to check whether the version of GCC used is greater than the specified +// requirement. +#define MAJOR 1000000 +#define MINOR 1000 +#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) +#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \ + ((__GNUC__ * (MAJOR) + __GNUC_MINOR__ * (MINOR) + __GNUC_PATCHLEVEL__) >= \ + ((major) * (MAJOR) + ((minor)) * (MINOR) + (patchlevel))) +#elif defined(__GNUC__) && defined(__GNUC_MINOR__) +#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) \ + ((__GNUC__ * (MAJOR) + __GNUC_MINOR__ * (MINOR)) >= \ + ((major) * (MAJOR) + ((minor)) * (MINOR) + (patchlevel))) +#else +#define GCC_VERSION_OR_NEWER(major, minor, patchlevel) 0 +#endif + + +#if defined(__clang__) && !defined(VIXL_NO_COMPILER_BUILTINS) + +// clang-format off +#define COMPILER_HAS_BUILTIN_CLRSB (__has_builtin(__builtin_clrsb)) +#define COMPILER_HAS_BUILTIN_CLZ (__has_builtin(__builtin_clz)) +#define COMPILER_HAS_BUILTIN_CTZ (__has_builtin(__builtin_ctz)) +#define COMPILER_HAS_BUILTIN_FFS (__has_builtin(__builtin_ffs)) +#define COMPILER_HAS_BUILTIN_POPCOUNT (__has_builtin(__builtin_popcount)) +// clang-format on + +#elif defined(__GNUC__) && !defined(VIXL_NO_COMPILER_BUILTINS) +// The documentation for these builtins is available at: +// https://gcc.gnu.org/onlinedocs/gcc-$MAJOR.$MINOR.$PATCHLEVEL/gcc//Other-Builtins.html + +// clang-format off +# define COMPILER_HAS_BUILTIN_CLRSB (GCC_VERSION_OR_NEWER(4, 7, 0)) +# define COMPILER_HAS_BUILTIN_CLZ (GCC_VERSION_OR_NEWER(3, 4, 0)) +# define COMPILER_HAS_BUILTIN_CTZ (GCC_VERSION_OR_NEWER(3, 4, 0)) +# define COMPILER_HAS_BUILTIN_FFS (GCC_VERSION_OR_NEWER(3, 4, 0)) +# define COMPILER_HAS_BUILTIN_POPCOUNT (GCC_VERSION_OR_NEWER(3, 4, 0)) +// clang-format on + +#else +// One can define VIXL_NO_COMPILER_BUILTINS to force using the manually +// implemented C++ methods. + +// clang-format off +#define COMPILER_HAS_BUILTIN_BSWAP false +#define COMPILER_HAS_BUILTIN_CLRSB false +#define COMPILER_HAS_BUILTIN_CLZ false +#define COMPILER_HAS_BUILTIN_CTZ false +#define COMPILER_HAS_BUILTIN_FFS false +#define COMPILER_HAS_BUILTIN_POPCOUNT false +// clang-format on + +#endif + + +template +inline bool IsPowerOf2(V value) { + return (value != 0) && ((value & (value - 1)) == 0); +} + + +// Declaration of fallback functions. +int CountLeadingSignBitsFallBack(int64_t value, int width); +int CountLeadingZerosFallBack(uint64_t value, int width); +int CountSetBitsFallBack(uint64_t value, int width); +int CountTrailingZerosFallBack(uint64_t value, int width); + + +// Implementation of intrinsics functions. +// TODO: The implementations could be improved for sizes different from 32bit +// and 64bit: we could mask the values and call the appropriate builtin. + +// Return the number of leading bits that match the topmost (sign) bit, +// excluding the topmost bit itself. +template +inline int CountLeadingSignBits(V value, int width = (sizeof(V) * 8)) { + VIXL_ASSERT(IsPowerOf2(width) && (width <= 64)); +#if COMPILER_HAS_BUILTIN_CLRSB + VIXL_ASSERT((LLONG_MIN <= value) && (value <= LLONG_MAX)); + int ll_width = sizeof(long long) * kBitsPerByte; // NOLINT(runtime/int) + int result = __builtin_clrsbll(value) - (ll_width - width); + // Check that the value fits in the specified width. + VIXL_ASSERT(result >= 0); + return result; +#else + VIXL_ASSERT((INT64_MIN <= value) && (value <= INT64_MAX)); + return CountLeadingSignBitsFallBack(value, width); +#endif +} + + +template +inline int CountLeadingZeros(V value, int width = (sizeof(V) * 8)) { +#if COMPILER_HAS_BUILTIN_CLZ + if (width == 32) { + return (value == 0) ? 32 : __builtin_clz(static_cast(value)); + } else if (width == 64) { + return (value == 0) ? 64 : __builtin_clzll(value); + } +#endif + return CountLeadingZerosFallBack(value, width); +} + + +template +inline int CountSetBits(V value, int width = (sizeof(V) * 8)) { +#if COMPILER_HAS_BUILTIN_POPCOUNT + if (width == 32) { + return __builtin_popcount(static_cast(value)); + } else if (width == 64) { + return __builtin_popcountll(value); + } +#endif + return CountSetBitsFallBack(value, width); +} + + +template +inline int CountTrailingZeros(V value, int width = (sizeof(V) * 8)) { +#if COMPILER_HAS_BUILTIN_CTZ + if (width == 32) { + return (value == 0) ? 32 : __builtin_ctz(static_cast(value)); + } else if (width == 64) { + return (value == 0) ? 64 : __builtin_ctzll(value); + } +#endif + return CountTrailingZerosFallBack(value, width); +} + +} // namespace vixl + +#endif // VIXL_COMPILER_INTRINSICS_H diff --git a/3rdparty/vixl/include/vixl/cpu-features.h b/3rdparty/vixl/include/vixl/cpu-features.h new file mode 100644 index 0000000000..97eb661a23 --- /dev/null +++ b/3rdparty/vixl/include/vixl/cpu-features.h @@ -0,0 +1,508 @@ +// Copyright 2018, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_CPU_FEATURES_H +#define VIXL_CPU_FEATURES_H + +#include +#include + +#include "globals-vixl.h" + +namespace vixl { + + +// VIXL aims to handle and detect all architectural features that are likely to +// influence code-generation decisions at EL0 (user-space). +// +// - There may be multiple VIXL feature flags for a given architectural +// extension. This occurs where the extension allow components to be +// implemented independently, or where kernel support is needed, and is likely +// to be fragmented. +// +// For example, Pointer Authentication (kPAuth*) has a separate feature flag +// for access to PACGA, and to indicate that the QARMA algorithm is +// implemented. +// +// - Conversely, some extensions have configuration options that do not affect +// EL0, so these are presented as a single VIXL feature. +// +// For example, the RAS extension (kRAS) has several variants, but the only +// feature relevant to VIXL is the addition of the ESB instruction so we only +// need a single flag. +// +// - VIXL offers separate flags for separate features even if they're +// architecturally linked. +// +// For example, the architecture requires kFPHalf and kNEONHalf to be equal, +// but they have separate hardware ID register fields so VIXL presents them as +// separate features. +// +// - VIXL can detect every feature for which it can generate code. +// +// - VIXL can detect some features for which it cannot generate code. +// +// The CPUFeatures::Feature enum — derived from the macro list below — is +// frequently extended. New features may be added to the list at any point, and +// no assumptions should be made about the numerical values assigned to each +// enum constant. The symbolic names can be considered to be stable. +// +// The debug descriptions are used only for debug output. The 'cpuinfo' strings +// are informative; VIXL does not use /proc/cpuinfo for feature detection. + +// clang-format off +#define VIXL_CPU_FEATURE_LIST(V) \ + /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_* */ \ + /* registers, so that the detailed feature registers can be read */ \ + /* directly. */ \ + \ + /* Constant name Debug description Linux 'cpuinfo' string. */ \ + V(kIDRegisterEmulation, "ID register emulation", "cpuid") \ + \ + V(kFP, "FP", "fp") \ + V(kNEON, "NEON", "asimd") \ + V(kCRC32, "CRC32", "crc32") \ + V(kDGH, "DGH", "dgh") \ + /* Speculation control features. */ \ + V(kCSV2, "CSV2", NULL) \ + V(kSCXTNUM, "SCXTNUM", NULL) \ + V(kCSV3, "CSV3", NULL) \ + V(kSB, "SB", "sb") \ + V(kSPECRES, "SPECRES", NULL) \ + V(kSSBS, "SSBS", NULL) \ + V(kSSBSControl, "SSBS (PSTATE control)", "ssbs") \ + /* Cryptographic support instructions. */ \ + V(kAES, "AES", "aes") \ + V(kSHA1, "SHA1", "sha1") \ + V(kSHA2, "SHA2", "sha2") \ + /* A form of PMULL{2} with a 128-bit (1Q) result. */ \ + V(kPmull1Q, "Pmull1Q", "pmull") \ + /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc. */ \ + V(kAtomics, "Atomics", "atomics") \ + /* Limited ordering regions: LDLAR, STLLR and their variants. */ \ + V(kLORegions, "LORegions", NULL) \ + /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH. */ \ + V(kRDM, "RDM", "asimdrdm") \ + /* Scalable Vector Extension. */ \ + V(kSVE, "SVE", "sve") \ + V(kSVEF64MM, "SVE F64MM", "svef64mm") \ + V(kSVEF32MM, "SVE F32MM", "svef32mm") \ + V(kSVEI8MM, "SVE I8MM", "svei8imm") \ + V(kSVEBF16, "SVE BFloat16", "svebf16") \ + /* SDOT and UDOT support (in NEON). */ \ + V(kDotProduct, "DotProduct", "asimddp") \ + /* Int8 matrix multiplication (in NEON). */ \ + V(kI8MM, "NEON I8MM", "i8mm") \ + /* Half-precision (FP16) support for FP and NEON, respectively. */ \ + V(kFPHalf, "FPHalf", "fphp") \ + V(kNEONHalf, "NEONHalf", "asimdhp") \ + /* BFloat16 support (in both FP and NEON.) */ \ + V(kBF16, "FP/NEON BFloat 16", "bf16") \ + /* The RAS extension, including the ESB instruction. */ \ + V(kRAS, "RAS", NULL) \ + /* Data cache clean to the point of persistence: DC CVAP. */ \ + V(kDCPoP, "DCPoP", "dcpop") \ + /* Data cache clean to the point of deep persistence: DC CVADP. */ \ + V(kDCCVADP, "DCCVADP", "dcpodp") \ + /* Cryptographic support instructions. */ \ + V(kSHA3, "SHA3", "sha3") \ + V(kSHA512, "SHA512", "sha512") \ + V(kSM3, "SM3", "sm3") \ + V(kSM4, "SM4", "sm4") \ + /* Pointer authentication for addresses. */ \ + V(kPAuth, "PAuth", "paca") \ + /* Pointer authentication for addresses uses QARMA. */ \ + V(kPAuthQARMA, "PAuthQARMA", NULL) \ + /* Generic authentication (using the PACGA instruction). */ \ + V(kPAuthGeneric, "PAuthGeneric", "pacg") \ + /* Generic authentication uses QARMA. */ \ + V(kPAuthGenericQARMA, "PAuthGenericQARMA", NULL) \ + /* JavaScript-style FP -> integer conversion instruction: FJCVTZS. */ \ + V(kJSCVT, "JSCVT", "jscvt") \ + /* Complex number support for NEON: FCMLA and FCADD. */ \ + V(kFcma, "Fcma", "fcma") \ + /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \ + V(kRCpc, "RCpc", "lrcpc") \ + V(kRCpcImm, "RCpc (imm)", "ilrcpc") \ + /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF. */ \ + V(kFlagM, "FlagM", "flagm") \ + /* Unaligned single-copy atomicity. */ \ + V(kUSCAT, "USCAT", "uscat") \ + /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}. */ \ + V(kFHM, "FHM", "asimdfhm") \ + /* Data-independent timing (for selected instructions). */ \ + V(kDIT, "DIT", "dit") \ + /* Branch target identification. */ \ + V(kBTI, "BTI", "bti") \ + /* Flag manipulation instructions: {AX,XA}FLAG */ \ + V(kAXFlag, "AXFlag", "flagm2") \ + /* Random number generation extension, */ \ + V(kRNG, "RNG", "rng") \ + /* Floating-point round to {32,64}-bit integer. */ \ + V(kFrintToFixedSizedInt,"Frint (bounded)", "frint") \ + /* Memory Tagging Extension. */ \ + V(kMTEInstructions, "MTE (EL0 instructions)", NULL) \ + V(kMTE, "MTE", NULL) \ + V(kMTE3, "MTE (asymmetric)", "mte3") \ + /* PAuth extensions. */ \ + V(kPAuthEnhancedPAC, "PAuth EnhancedPAC", NULL) \ + V(kPAuthEnhancedPAC2, "PAuth EnhancedPAC2", NULL) \ + V(kPAuthFPAC, "PAuth FPAC", NULL) \ + V(kPAuthFPACCombined, "PAuth FPACCombined", NULL) \ + /* Scalable Vector Extension 2. */ \ + V(kSVE2, "SVE2", "sve2") \ + V(kSVESM4, "SVE SM4", "svesm4") \ + V(kSVESHA3, "SVE SHA3", "svesha3") \ + V(kSVEBitPerm, "SVE BitPerm", "svebitperm") \ + V(kSVEAES, "SVE AES", "sveaes") \ + V(kSVEPmull128, "SVE Pmull128", "svepmull") \ + /* Alternate floating-point behavior */ \ + V(kAFP, "AFP", "afp") \ + /* Enhanced Counter Virtualization */ \ + V(kECV, "ECV", "ecv") \ + /* Increased precision of Reciprocal Estimate and Square Root Estimate */ \ + V(kRPRES, "RPRES", "rpres") \ + /* Memory operation instructions, for memcpy, memset */ \ + V(kMOPS, "Memory ops", NULL) \ + /* Scalable Matrix Extension (SME) */ \ + V(kSME, "SME", "sme") \ + V(kSMEi16i64, "SME (i16i64)", "smei16i64") \ + V(kSMEf64f64, "SME (f64f64)", "smef64f64") \ + V(kSMEi8i32, "SME (i8i32)", "smei8i32") \ + V(kSMEf16f32, "SME (f16f32)", "smef16f32") \ + V(kSMEb16f32, "SME (b16f32)", "smeb16f32") \ + V(kSMEf32f32, "SME (f32f32)", "smef32f32") \ + V(kSMEfa64, "SME (fa64)", "smefa64") \ + /* WFET and WFIT instruction support */ \ + V(kWFXT, "WFXT", "wfxt") \ + /* Extended BFloat16 instructions */ \ + V(kEBF16, "EBF16", "ebf16") \ + V(kSVE_EBF16, "EBF16 (SVE)", "sveebf16") \ + V(kCSSC, "CSSC", "cssc") +// clang-format on + + +class CPUFeaturesConstIterator; + +// A representation of the set of features known to be supported by the target +// device. Each feature is represented by a simple boolean flag. +// +// - When the Assembler is asked to assemble an instruction, it asserts (in +// debug mode) that the necessary features are available. +// +// - TODO: The MacroAssembler relies on the Assembler's assertions, but in +// some cases it may be useful for macros to generate a fall-back sequence +// in case features are not available. +// +// - The Simulator assumes by default that all features are available, but it +// is possible to configure it to fail if the simulated code uses features +// that are not enabled. +// +// The Simulator also offers pseudo-instructions to allow features to be +// enabled and disabled dynamically. This is useful when you want to ensure +// that some features are constrained to certain areas of code. +// +// - The base Disassembler knows nothing about CPU features, but the +// PrintDisassembler can be configured to annotate its output with warnings +// about unavailable features. The Simulator uses this feature when +// instruction trace is enabled. +// +// - The Decoder-based components -- the Simulator and PrintDisassembler -- +// rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of +// features actually encountered so that a large block of code can be +// examined (either directly or through simulation), and the required +// features analysed later. +// +// Expected usage: +// +// // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for +// // compatibility with older version of VIXL. +// MacroAssembler masm; +// +// // Generate code only for the current CPU. +// masm.SetCPUFeatures(CPUFeatures::InferFromOS()); +// +// // Turn off feature checking entirely. +// masm.SetCPUFeatures(CPUFeatures::All()); +// +// Feature set manipulation: +// +// CPUFeatures f; // The default constructor gives an empty set. +// // Individual features can be added (or removed). +// f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES); +// f.Remove(CPUFeatures::kNEON); +// +// // Some helpers exist for extensions that provide several features. +// f.Remove(CPUFeatures::All()); +// f.Combine(CPUFeatures::AArch64LegacyBaseline()); +// +// // Chained construction is also possible. +// CPUFeatures g = +// f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32); +// +// // Features can be queried. Where multiple features are given, they are +// // combined with logical AND. +// if (h.Has(CPUFeatures::kNEON)) { ... } +// if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... } +// if (h.Has(g)) { ... } +// // If the empty set is requested, the result is always 'true'. +// VIXL_ASSERT(h.Has(CPUFeatures())); +// +// // For debug and reporting purposes, features can be enumerated (or +// // printed directly): +// std::cout << CPUFeatures::kNEON; // Prints something like "NEON". +// std::cout << f; // Prints something like "FP, NEON, CRC32". +class CPUFeatures { + public: + // clang-format off + // Individual features. + // These should be treated as opaque tokens. User code should not rely on + // specific numeric values or ordering. + enum Feature { + // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that + // this class supports. + + kNone = -1, +#define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL, + VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE) +#undef VIXL_DECLARE_FEATURE + kNumberOfFeatures + }; + // clang-format on + + // By default, construct with no features enabled. + CPUFeatures() : features_{} {} + + // Construct with some features already enabled. + template + CPUFeatures(T first, U... others) : features_{} { + Combine(first, others...); + } + + // Construct with all features enabled. This can be used to disable feature + // checking: `Has(...)` returns true regardless of the argument. + static CPUFeatures All(); + + // Construct an empty CPUFeatures. This is equivalent to the default + // constructor, but is provided for symmetry and convenience. + static CPUFeatures None() { return CPUFeatures(); } + + // The presence of these features was assumed by version of VIXL before this + // API was added, so using this set by default ensures API compatibility. + static CPUFeatures AArch64LegacyBaseline() { + return CPUFeatures(kFP, kNEON, kCRC32); + } + + // Construct a new CPUFeatures object using ID registers. This assumes that + // kIDRegisterEmulation is present. + static CPUFeatures InferFromIDRegisters(); + + enum QueryIDRegistersOption { + kDontQueryIDRegisters, + kQueryIDRegistersIfAvailable + }; + + // Construct a new CPUFeatures object based on what the OS reports. + static CPUFeatures InferFromOS( + QueryIDRegistersOption option = kQueryIDRegistersIfAvailable); + + // Combine another CPUFeatures object into this one. Features that already + // exist in this set are left unchanged. + void Combine(const CPUFeatures& other); + + // Combine a specific feature into this set. If it already exists in the set, + // the set is left unchanged. + void Combine(Feature feature); + + // Combine multiple features (or feature sets) into this set. + template + void Combine(T first, U... others) { + Combine(first); + Combine(others...); + } + + // Remove features in another CPUFeatures object from this one. + void Remove(const CPUFeatures& other); + + // Remove a specific feature from this set. This has no effect if the feature + // doesn't exist in the set. + void Remove(Feature feature0); + + // Remove multiple features (or feature sets) from this set. + template + void Remove(T first, U... others) { + Remove(first); + Remove(others...); + } + + // Chaining helpers for convenient construction by combining other CPUFeatures + // or individual Features. + template + CPUFeatures With(T... others) const { + CPUFeatures f(*this); + f.Combine(others...); + return f; + } + + template + CPUFeatures Without(T... others) const { + CPUFeatures f(*this); + f.Remove(others...); + return f; + } + + // Test whether the `other` feature set is equal to or a subset of this one. + bool Has(const CPUFeatures& other) const; + + // Test whether a single feature exists in this set. + // Note that `Has(kNone)` always returns true. + bool Has(Feature feature) const; + + // Test whether all of the specified features exist in this set. + template + bool Has(T first, U... others) const { + return Has(first) && Has(others...); + } + + // Return the number of enabled features. + size_t Count() const; + bool HasNoFeatures() const { return Count() == 0; } + + // Check for equivalence. + bool operator==(const CPUFeatures& other) const { + return Has(other) && other.Has(*this); + } + bool operator!=(const CPUFeatures& other) const { return !(*this == other); } + + typedef CPUFeaturesConstIterator const_iterator; + + const_iterator begin() const; + const_iterator end() const; + + private: + // Each bit represents a feature. This set will be extended as needed. + std::bitset features_; + + friend std::ostream& operator<<(std::ostream& os, + const vixl::CPUFeatures& features); +}; + +std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature); +std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features); + +// This is not a proper C++ iterator type, but it simulates enough of +// ForwardIterator that simple loops can be written. +class CPUFeaturesConstIterator { + public: + CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL, + CPUFeatures::Feature start = CPUFeatures::kNone) + : cpu_features_(cpu_features), feature_(start) { + VIXL_ASSERT(IsValid()); + } + + bool operator==(const CPUFeaturesConstIterator& other) const; + bool operator!=(const CPUFeaturesConstIterator& other) const { + return !(*this == other); + } + CPUFeaturesConstIterator& operator++(); + CPUFeaturesConstIterator operator++(int); + + CPUFeatures::Feature operator*() const { + VIXL_ASSERT(IsValid()); + return feature_; + } + + // For proper support of C++'s simplest "Iterator" concept, this class would + // have to define member types (such as CPUFeaturesIterator::pointer) to make + // it appear as if it iterates over Feature objects in memory. That is, we'd + // need CPUFeatures::iterator to behave like std::vector::iterator. + // This is at least partially possible -- the std::vector specialisation + // does something similar -- but it doesn't seem worthwhile for a + // special-purpose debug helper, so they are omitted here. + private: + const CPUFeatures* cpu_features_; + CPUFeatures::Feature feature_; + + bool IsValid() const { + if (cpu_features_ == NULL) { + return feature_ == CPUFeatures::kNone; + } + return cpu_features_->Has(feature_); + } +}; + +// A convenience scope for temporarily modifying a CPU features object. This +// allows features to be enabled for short sequences. +// +// Expected usage: +// +// { +// CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32); +// // This scope can now use CRC32, as well as anything else that was enabled +// // before the scope. +// +// ... +// +// // At the end of the scope, the original CPU features are restored. +// } +class CPUFeaturesScope { + public: + // Start a CPUFeaturesScope on any object that implements + // `CPUFeatures* GetCPUFeatures()`. + template + explicit CPUFeaturesScope(T* cpu_features_wrapper) + : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), + old_features_(*cpu_features_) {} + + // Start a CPUFeaturesScope on any object that implements + // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled. + template + CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features) + : cpu_features_(cpu_features_wrapper->GetCPUFeatures()), + old_features_(*cpu_features_) { + cpu_features_->Combine(first, features...); + } + + ~CPUFeaturesScope() { *cpu_features_ = old_features_; } + + // For advanced usage, the CPUFeatures object can be accessed directly. + // The scope will restore the original state when it ends. + + CPUFeatures* GetCPUFeatures() const { return cpu_features_; } + + void SetCPUFeatures(const CPUFeatures& cpu_features) { + *cpu_features_ = cpu_features; + } + + private: + CPUFeatures* const cpu_features_; + const CPUFeatures old_features_; +}; + + +} // namespace vixl + +#endif // VIXL_CPU_FEATURES_H diff --git a/3rdparty/vixl/include/vixl/globals-vixl.h b/3rdparty/vixl/include/vixl/globals-vixl.h new file mode 100644 index 0000000000..4548ba897f --- /dev/null +++ b/3rdparty/vixl/include/vixl/globals-vixl.h @@ -0,0 +1,298 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_GLOBALS_H +#define VIXL_GLOBALS_H + +#if __cplusplus < 201402L +#error VIXL requires C++14 +#endif + +// Get standard C99 macros for integer types. +#ifndef __STDC_CONSTANT_MACROS +#define __STDC_CONSTANT_MACROS +#endif + +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +extern "C" { +#include +#include +} + +#include +#include +#include +#include +#include + +#include "platform-vixl.h" + +#ifdef VIXL_NEGATIVE_TESTING +#include +#include +#include +#endif + +namespace vixl { + +typedef uint8_t byte; + +const int KBytes = 1024; +const int MBytes = 1024 * KBytes; + +const int kBitsPerByteLog2 = 3; +const int kBitsPerByte = 1 << kBitsPerByteLog2; + +template +struct Unsigned; + +template <> +struct Unsigned<32> { + typedef uint32_t type; +}; + +template <> +struct Unsigned<64> { + typedef uint64_t type; +}; + +} // namespace vixl + +// Detect the host's pointer size. +#if (UINTPTR_MAX == UINT32_MAX) +#define VIXL_HOST_POINTER_32 +#elif (UINTPTR_MAX == UINT64_MAX) +#define VIXL_HOST_POINTER_64 +#else +#error "Unsupported host pointer size." +#endif + +#ifdef VIXL_NEGATIVE_TESTING +#define VIXL_ABORT() \ + do { \ + std::ostringstream oss; \ + oss << "Aborting in " << __FILE__ << ", line " << __LINE__ << std::endl; \ + throw std::runtime_error(oss.str()); \ + } while (false) +#define VIXL_ABORT_WITH_MSG(msg) \ + do { \ + std::ostringstream oss; \ + oss << (msg) << "in " << __FILE__ << ", line " << __LINE__ << std::endl; \ + throw std::runtime_error(oss.str()); \ + } while (false) +#define VIXL_CHECK(condition) \ + do { \ + if (!(condition)) { \ + std::ostringstream oss; \ + oss << "Assertion failed (" #condition ")\nin "; \ + oss << __FILE__ << ", line " << __LINE__ << std::endl; \ + throw std::runtime_error(oss.str()); \ + } \ + } while (false) +#else +#define VIXL_ABORT() \ + do { \ + printf("Aborting in %s, line %i\n", __FILE__, __LINE__); \ + abort(); \ + } while (false) +#define VIXL_ABORT_WITH_MSG(msg) \ + do { \ + printf("%sin %s, line %i\n", (msg), __FILE__, __LINE__); \ + abort(); \ + } while (false) +#define VIXL_CHECK(condition) \ + do { \ + if (!(condition)) { \ + printf("Assertion failed (%s)\nin %s, line %i\n", \ + #condition, \ + __FILE__, \ + __LINE__); \ + abort(); \ + } \ + } while (false) +#endif +#ifdef VIXL_DEBUG +#define VIXL_ASSERT(condition) VIXL_CHECK(condition) +#define VIXL_UNIMPLEMENTED() \ + do { \ + VIXL_ABORT_WITH_MSG("UNIMPLEMENTED "); \ + } while (false) +#define VIXL_UNREACHABLE() \ + do { \ + VIXL_ABORT_WITH_MSG("UNREACHABLE "); \ + } while (false) +#else +#define VIXL_ASSERT(condition) ((void)0) +#define VIXL_UNIMPLEMENTED() ((void)0) +#define VIXL_UNREACHABLE() ((void)0) +#endif +// This is not as powerful as template based assertions, but it is simple. +// It assumes that the descriptions are unique. If this starts being a problem, +// we can switch to a different implementation. +#define VIXL_CONCAT(a, b) a##b +#if __cplusplus >= 201103L +#define VIXL_STATIC_ASSERT_LINE(line_unused, condition, message) \ + static_assert(condition, message) +#else +#define VIXL_STATIC_ASSERT_LINE(line, condition, message_unused) \ + typedef char VIXL_CONCAT(STATIC_ASSERT_LINE_, line)[(condition) ? 1 : -1] \ + __attribute__((unused)) +#endif +#define VIXL_STATIC_ASSERT(condition) \ + VIXL_STATIC_ASSERT_LINE(__LINE__, condition, "") +#define VIXL_STATIC_ASSERT_MESSAGE(condition, message) \ + VIXL_STATIC_ASSERT_LINE(__LINE__, condition, message) + +#define VIXL_WARNING(message) \ + do { \ + printf("WARNING in %s, line %i: %s", __FILE__, __LINE__, message); \ + } while (false) + +template +inline void USE(const T1&) {} + +template +inline void USE(const T1&, const T2&) {} + +template +inline void USE(const T1&, const T2&, const T3&) {} + +template +inline void USE(const T1&, const T2&, const T3&, const T4&) {} + +#define VIXL_ALIGNMENT_EXCEPTION() \ + do { \ + VIXL_ABORT_WITH_MSG("ALIGNMENT EXCEPTION\t"); \ + } while (0) + +// The clang::fallthrough attribute is used along with the Wimplicit-fallthrough +// argument to annotate intentional fall-through between switch labels. +// For more information please refer to: +// http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough +#ifndef __has_warning +#define __has_warning(x) 0 +#endif + +// Fallthrough annotation for Clang and C++11(201103L). +#if __has_warning("-Wimplicit-fallthrough") && __cplusplus >= 201103L +#define VIXL_FALLTHROUGH() [[clang::fallthrough]] +// Fallthrough annotation for GCC >= 7. +#elif defined(__GNUC__) && __GNUC__ >= 7 +#define VIXL_FALLTHROUGH() __attribute__((fallthrough)) +#else +#define VIXL_FALLTHROUGH() \ + do { \ + } while (0) +#endif + +#if __cplusplus >= 201103L +#define VIXL_NO_RETURN [[noreturn]] +#else +#define VIXL_NO_RETURN __attribute__((noreturn)) +#endif +#ifdef VIXL_DEBUG +#define VIXL_NO_RETURN_IN_DEBUG_MODE VIXL_NO_RETURN +#else +#define VIXL_NO_RETURN_IN_DEBUG_MODE +#endif + +#if __cplusplus >= 201103L +#define VIXL_OVERRIDE override +#define VIXL_CONSTEXPR constexpr +#define VIXL_HAS_CONSTEXPR 1 +#else +#define VIXL_OVERRIDE +#define VIXL_CONSTEXPR +#endif + +// With VIXL_NEGATIVE_TESTING on, VIXL_ASSERT and VIXL_CHECK will throw +// exceptions but C++11 marks destructors as noexcept(true) by default. +#if defined(VIXL_NEGATIVE_TESTING) && __cplusplus >= 201103L +#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION noexcept(false) +#else +#define VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION +#endif + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 +#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 1 +#endif +#else +#ifndef VIXL_AARCH64_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH64_GENERATE_SIMULATOR_CODE 0 +#endif +#if VIXL_AARCH64_GENERATE_SIMULATOR_CODE +#warning "Generating Simulator instructions without Simulator support." +#endif +#endif + +// We do not have a simulator for AArch32, although we can pretend we do so that +// tests that require running natively can be skipped. +#ifndef __arm__ +#define VIXL_INCLUDE_SIMULATOR_AARCH32 +#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 1 +#endif +#else +#ifndef VIXL_AARCH32_GENERATE_SIMULATOR_CODE +#define VIXL_AARCH32_GENERATE_SIMULATOR_CODE 0 +#endif +#endif + +#ifdef USE_SIMULATOR +#error "Please see the release notes for USE_SIMULATOR." +#endif + +// Target Architecture/ISA +#ifdef VIXL_INCLUDE_TARGET_A64 +#ifndef VIXL_INCLUDE_TARGET_AARCH64 +#define VIXL_INCLUDE_TARGET_AARCH64 +#endif +#endif + +#if defined(VIXL_INCLUDE_TARGET_A32) && defined(VIXL_INCLUDE_TARGET_T32) +#ifndef VIXL_INCLUDE_TARGET_AARCH32 +#define VIXL_INCLUDE_TARGET_AARCH32 +#endif +#elif defined(VIXL_INCLUDE_TARGET_A32) +#ifndef VIXL_INCLUDE_TARGET_A32_ONLY +#define VIXL_INCLUDE_TARGET_A32_ONLY +#endif +#else +#ifndef VIXL_INCLUDE_TARGET_T32_ONLY +#define VIXL_INCLUDE_TARGET_T32_ONLY +#endif +#endif + + +#endif // VIXL_GLOBALS_H diff --git a/3rdparty/vixl/include/vixl/invalset-vixl.h b/3rdparty/vixl/include/vixl/invalset-vixl.h new file mode 100644 index 0000000000..dc15149599 --- /dev/null +++ b/3rdparty/vixl/include/vixl/invalset-vixl.h @@ -0,0 +1,920 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_INVALSET_H_ +#define VIXL_INVALSET_H_ + +#include + +#include +#include + +#include "globals-vixl.h" + +namespace vixl { + +// We define a custom data structure template and its iterator as `std` +// containers do not fit the performance requirements for some of our use cases. +// +// The structure behaves like an iterable unordered set with special properties +// and restrictions. "InvalSet" stands for "Invalidatable Set". +// +// Restrictions and requirements: +// - Adding an element already present in the set is illegal. In debug mode, +// this is checked at insertion time. +// - The templated class `ElementType` must provide comparison operators so that +// `std::sort()` can be used. +// - A key must be available to represent invalid elements. +// - Elements with an invalid key must compare higher or equal to any other +// element. +// +// Use cases and performance considerations: +// Our use cases present two specificities that allow us to design this +// structure to provide fast insertion *and* fast search and deletion +// operations: +// - Elements are (generally) inserted in order (sorted according to their key). +// - A key is available to mark elements as invalid (deleted). +// The backing `std::vector` allows for fast insertions. When +// searching for an element we ensure the elements are sorted (this is generally +// the case) and perform a binary search. When deleting an element we do not +// free the associated memory immediately. Instead, an element to be deleted is +// marked with the 'invalid' key. Other methods of the container take care of +// ignoring entries marked as invalid. +// To avoid the overhead of the `std::vector` container when only few entries +// are used, a number of elements are preallocated. + +// 'ElementType' and 'KeyType' are respectively the types of the elements and +// their key. The structure only reclaims memory when safe to do so, if the +// number of elements that can be reclaimed is greater than `RECLAIM_FROM` and +// greater than ` / RECLAIM_FACTOR. +// clang-format off +#define TEMPLATE_INVALSET_P_DECL \ + class ElementType, \ + unsigned N_PREALLOCATED_ELEMENTS, \ + class KeyType, \ + KeyType INVALID_KEY, \ + size_t RECLAIM_FROM, \ + unsigned RECLAIM_FACTOR +// clang-format on + +#define TEMPLATE_INVALSET_P_DEF \ + ElementType, N_PREALLOCATED_ELEMENTS, KeyType, INVALID_KEY, RECLAIM_FROM, \ + RECLAIM_FACTOR + +template +class InvalSetIterator; // Forward declaration. + +template +class InvalSet { + public: + InvalSet(); + ~InvalSet() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION; + + static const size_t kNPreallocatedElements = N_PREALLOCATED_ELEMENTS; + static const KeyType kInvalidKey = INVALID_KEY; + + // C++ STL iterator interface. + typedef InvalSetIterator > iterator; + iterator begin(); + iterator end(); + + // It is illegal to insert an element already present in the set. + void insert(const ElementType& element); + + // Looks for the specified element in the set and - if found - deletes it. + // The return value is the number of elements erased: either 0 or 1. + size_t erase(const ElementType& element); + + // This indicates the number of (valid) elements stored in this set. + size_t size() const; + + // Returns true if no elements are stored in the set. + // Note that this does not mean the backing storage is empty: it can still + // contain invalid elements. + bool empty() const; + + void clear(); + + const ElementType GetMinElement(); + + // This returns the key of the minimum element in the set. + KeyType GetMinElementKey(); + + static bool IsValid(const ElementType& element); + static KeyType GetKey(const ElementType& element); + static void SetKey(ElementType* element, KeyType key); + + typedef ElementType _ElementType; + typedef KeyType _KeyType; + + protected: + // Returns a pointer to the element in vector_ if it was found, or NULL + // otherwise. + ElementType* Search(const ElementType& element); + + // The argument *must* point to an element stored in *this* set. + // This function is not allowed to move elements in the backing vector + // storage. + void EraseInternal(ElementType* element); + + // The elements in the range searched must be sorted. + ElementType* BinarySearch(const ElementType& element, + ElementType* start, + ElementType* end) const; + + // Sort the elements. + enum SortType { + // The 'hard' version guarantees that invalid elements are moved to the end + // of the container. + kHardSort, + // The 'soft' version only guarantees that the elements will be sorted. + // Invalid elements may still be present anywhere in the set. + kSoftSort + }; + void Sort(SortType sort_type); + + // Delete the elements that have an invalid key. The complexity is linear + // with the size of the vector. + void Clean(); + + const ElementType Front() const; + const ElementType Back() const; + + // Delete invalid trailing elements and return the last valid element in the + // set. + const ElementType CleanBack(); + + // Returns a pointer to the start or end of the backing storage. + const ElementType* StorageBegin() const; + const ElementType* StorageEnd() const; + ElementType* StorageBegin(); + ElementType* StorageEnd(); + + // Returns the index of the element within the backing storage. The element + // must belong to the backing storage. + size_t GetElementIndex(const ElementType* element) const; + + // Returns the element at the specified index in the backing storage. + const ElementType* GetElementAt(size_t index) const; + ElementType* GetElementAt(size_t index); + + static const ElementType* GetFirstValidElement(const ElementType* from, + const ElementType* end); + + void CacheMinElement(); + const ElementType GetCachedMinElement() const; + + bool ShouldReclaimMemory() const; + void ReclaimMemory(); + + bool IsUsingVector() const { return vector_ != NULL; } + void SetSorted(bool sorted) { sorted_ = sorted; } + + // We cache some data commonly required by users to improve performance. + // We cannot cache pointers to elements as we do not control the backing + // storage. + bool valid_cached_min_; + size_t cached_min_index_; // Valid iff `valid_cached_min_` is true. + KeyType cached_min_key_; // Valid iff `valid_cached_min_` is true. + + // Indicates whether the elements are sorted. + bool sorted_; + + // This represents the number of (valid) elements in this set. + size_t size_; + + // The backing storage is either the array of preallocated elements or the + // vector. The structure starts by using the preallocated elements, and + // transitions (permanently) to using the vector once more than + // kNPreallocatedElements are used. + // Elements are only invalidated when using the vector. The preallocated + // storage always only contains valid elements. + ElementType preallocated_[kNPreallocatedElements]; + std::vector* vector_; + + // Iterators acquire and release this monitor. While a set is acquired, + // certain operations are illegal to ensure that the iterator will + // correctly iterate over the elements in the set. + int monitor_; +#ifdef VIXL_DEBUG + int monitor() const { return monitor_; } + void Acquire() { monitor_++; } + void Release() { + monitor_--; + VIXL_ASSERT(monitor_ >= 0); + } +#endif + + private: +// The copy constructor and assignment operator are not used and the defaults +// are unsafe, so disable them (without an implementation). +#if __cplusplus >= 201103L + InvalSet(const InvalSet& other) = delete; + InvalSet operator=(const InvalSet& other) = delete; +#else + InvalSet(const InvalSet& other); + InvalSet operator=(const InvalSet& other); +#endif + + friend class InvalSetIterator >; +}; + + +template +class InvalSetIterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = typename S::_ElementType; + using difference_type = std::ptrdiff_t; + using pointer = typename S::_ElementType*; + using reference = typename S::_ElementType&; + + private: + // Redefine types to mirror the associated set types. + typedef typename S::_ElementType ElementType; + typedef typename S::_KeyType KeyType; + + public: + explicit InvalSetIterator(S* inval_set = NULL); + + // This class implements the standard copy-swap idiom. + ~InvalSetIterator(); + InvalSetIterator(const InvalSetIterator& other); + InvalSetIterator& operator=(InvalSetIterator other); +#if __cplusplus >= 201103L + InvalSetIterator(InvalSetIterator&& other) noexcept; +#endif + + friend void swap(InvalSetIterator& a, InvalSetIterator& b) { + using std::swap; + swap(a.using_vector_, b.using_vector_); + swap(a.index_, b.index_); + swap(a.inval_set_, b.inval_set_); + } + + // Return true if the iterator is at the end of the set. + bool Done() const; + + // Move this iterator to the end of the set. + void Finish(); + + // Delete the current element and advance the iterator to point to the next + // element. + void DeleteCurrentAndAdvance(); + + static bool IsValid(const ElementType& element); + static KeyType GetKey(const ElementType& element); + + // Extra helpers to support the forward-iterator interface. + InvalSetIterator& operator++(); // Pre-increment. + InvalSetIterator operator++(int); // Post-increment. + bool operator==(const InvalSetIterator& rhs) const; + bool operator!=(const InvalSetIterator& rhs) const { + return !(*this == rhs); + } + ElementType& operator*() { return *Current(); } + const ElementType& operator*() const { return *Current(); } + ElementType* operator->() { return Current(); } + const ElementType* operator->() const { return Current(); } + + protected: + void MoveToValidElement(); + + // Indicates if the iterator is looking at the vector or at the preallocated + // elements. + bool using_vector_; + // Used when looking at the preallocated elements, or in debug mode when using + // the vector to track how many times the iterator has advanced. + size_t index_; + typename std::vector::iterator iterator_; + S* inval_set_; + + // TODO: These helpers are deprecated and will be removed in future versions + // of VIXL. + ElementType* Current() const; + void Advance(); +}; + + +template +InvalSet::InvalSet() + : valid_cached_min_(false), sorted_(true), size_(0), vector_(NULL) { +#ifdef VIXL_DEBUG + monitor_ = 0; +#endif +} + + +template +InvalSet::~InvalSet() + VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { + VIXL_ASSERT(monitor_ == 0); + delete vector_; +} + + +template +typename InvalSet::iterator +InvalSet::begin() { + return iterator(this); +} + + +template +typename InvalSet::iterator +InvalSet::end() { + iterator end(this); + end.Finish(); + return end; +} + + +template +void InvalSet::insert(const ElementType& element) { + VIXL_ASSERT(monitor() == 0); + VIXL_ASSERT(IsValid(element)); + VIXL_ASSERT(Search(element) == NULL); + SetSorted(empty() || (sorted_ && (element > CleanBack()))); + if (IsUsingVector()) { + vector_->push_back(element); + } else { + if (size_ < kNPreallocatedElements) { + preallocated_[size_] = element; + } else { + // Transition to using the vector. + vector_ = + new std::vector(preallocated_, preallocated_ + size_); + vector_->push_back(element); + } + } + size_++; + + if (valid_cached_min_ && (element < GetMinElement())) { + cached_min_index_ = IsUsingVector() ? vector_->size() - 1 : size_ - 1; + cached_min_key_ = GetKey(element); + valid_cached_min_ = true; + } + + if (ShouldReclaimMemory()) { + ReclaimMemory(); + } +} + + +template +size_t InvalSet::erase(const ElementType& element) { + VIXL_ASSERT(monitor() == 0); + VIXL_ASSERT(IsValid(element)); + ElementType* local_element = Search(element); + if (local_element != NULL) { + EraseInternal(local_element); + return 1; + } + return 0; +} + + +template +ElementType* InvalSet::Search( + const ElementType& element) { + VIXL_ASSERT(monitor() == 0); + if (empty()) { + return NULL; + } + if (ShouldReclaimMemory()) { + ReclaimMemory(); + } + if (!sorted_) { + Sort(kHardSort); + } + if (!valid_cached_min_) { + CacheMinElement(); + } + return BinarySearch(element, GetElementAt(cached_min_index_), StorageEnd()); +} + + +template +size_t InvalSet::size() const { + return size_; +} + + +template +bool InvalSet::empty() const { + return size_ == 0; +} + + +template +void InvalSet::clear() { + VIXL_ASSERT(monitor() == 0); + size_ = 0; + if (IsUsingVector()) { + vector_->clear(); + } + SetSorted(true); + valid_cached_min_ = false; +} + + +template +const ElementType InvalSet::GetMinElement() { + VIXL_ASSERT(monitor() == 0); + VIXL_ASSERT(!empty()); + CacheMinElement(); + return *GetElementAt(cached_min_index_); +} + + +template +KeyType InvalSet::GetMinElementKey() { + VIXL_ASSERT(monitor() == 0); + if (valid_cached_min_) { + return cached_min_key_; + } else { + return GetKey(GetMinElement()); + } +} + + +template +bool InvalSet::IsValid(const ElementType& element) { + return GetKey(element) != kInvalidKey; +} + + +template +void InvalSet::EraseInternal(ElementType* element) { + // Note that this function must be safe even while an iterator has acquired + // this set. + VIXL_ASSERT(element != NULL); + size_t deleted_index = GetElementIndex(element); + if (IsUsingVector()) { + VIXL_ASSERT((&(vector_->front()) <= element) && + (element <= &(vector_->back()))); + SetKey(element, kInvalidKey); + } else { + VIXL_ASSERT((preallocated_ <= element) && + (element < (preallocated_ + kNPreallocatedElements))); + ElementType* end = preallocated_ + kNPreallocatedElements; + size_t copy_size = sizeof(*element) * (end - element - 1); + memmove(element, element + 1, copy_size); + } + size_--; + + if (valid_cached_min_ && (deleted_index == cached_min_index_)) { + if (sorted_ && !empty()) { + const ElementType* min = GetFirstValidElement(element, StorageEnd()); + cached_min_index_ = GetElementIndex(min); + cached_min_key_ = GetKey(*min); + valid_cached_min_ = true; + } else { + valid_cached_min_ = false; + } + } +} + + +template +ElementType* InvalSet::BinarySearch( + const ElementType& element, ElementType* start, ElementType* end) const { + if (start == end) { + return NULL; + } + VIXL_ASSERT(sorted_); + VIXL_ASSERT(start < end); + VIXL_ASSERT(!empty()); + + // Perform a binary search through the elements while ignoring invalid + // elements. + ElementType* elements = start; + size_t low = 0; + size_t high = (end - start) - 1; + while (low < high) { + // Find valid bounds. + while (!IsValid(elements[low]) && (low < high)) ++low; + while (!IsValid(elements[high]) && (low < high)) --high; + VIXL_ASSERT(low <= high); + // Avoid overflow when computing the middle index. + size_t middle = low + (high - low) / 2; + if ((middle == low) || (middle == high)) { + break; + } + while ((middle < high - 1) && !IsValid(elements[middle])) ++middle; + while ((low + 1 < middle) && !IsValid(elements[middle])) --middle; + if (!IsValid(elements[middle])) { + break; + } + if (elements[middle] < element) { + low = middle; + } else { + high = middle; + } + } + + if (elements[low] == element) return &elements[low]; + if (elements[high] == element) return &elements[high]; + return NULL; +} + + +template +void InvalSet::Sort(SortType sort_type) { + if (sort_type == kSoftSort) { + if (sorted_) { + return; + } + } + VIXL_ASSERT(monitor() == 0); + if (empty()) { + return; + } + + Clean(); + std::sort(StorageBegin(), StorageEnd()); + + SetSorted(true); + cached_min_index_ = 0; + cached_min_key_ = GetKey(Front()); + valid_cached_min_ = true; +} + + +template +void InvalSet::Clean() { + VIXL_ASSERT(monitor() == 0); + if (empty() || !IsUsingVector()) { + return; + } + // Manually iterate through the vector storage to discard invalid elements. + ElementType* start = &(vector_->front()); + ElementType* end = start + vector_->size(); + ElementType* c = start; + ElementType* first_invalid; + ElementType* first_valid; + ElementType* next_invalid; + + while ((c < end) && IsValid(*c)) c++; + first_invalid = c; + + while (c < end) { + while ((c < end) && !IsValid(*c)) c++; + first_valid = c; + while ((c < end) && IsValid(*c)) c++; + next_invalid = c; + + ptrdiff_t n_moved_elements = (next_invalid - first_valid); + memmove(first_invalid, first_valid, n_moved_elements * sizeof(*c)); + first_invalid = first_invalid + n_moved_elements; + c = next_invalid; + } + + // Delete the trailing invalid elements. + vector_->erase(vector_->begin() + (first_invalid - start), vector_->end()); + VIXL_ASSERT(vector_->size() == size_); + + if (sorted_) { + valid_cached_min_ = true; + cached_min_index_ = 0; + cached_min_key_ = GetKey(*GetElementAt(0)); + } else { + valid_cached_min_ = false; + } +} + + +template +const ElementType InvalSet::Front() const { + VIXL_ASSERT(!empty()); + return IsUsingVector() ? vector_->front() : preallocated_[0]; +} + + +template +const ElementType InvalSet::Back() const { + VIXL_ASSERT(!empty()); + return IsUsingVector() ? vector_->back() : preallocated_[size_ - 1]; +} + + +template +const ElementType InvalSet::CleanBack() { + VIXL_ASSERT(monitor() == 0); + if (IsUsingVector()) { + // Delete the invalid trailing elements. + typename std::vector::reverse_iterator it = vector_->rbegin(); + while (!IsValid(*it)) { + it++; + } + vector_->erase(it.base(), vector_->end()); + } + return Back(); +} + + +template +const ElementType* InvalSet::StorageBegin() const { + return IsUsingVector() ? &(vector_->front()) : preallocated_; +} + + +template +const ElementType* InvalSet::StorageEnd() const { + return IsUsingVector() ? &(vector_->back()) + 1 : preallocated_ + size_; +} + + +template +ElementType* InvalSet::StorageBegin() { + return IsUsingVector() ? &(vector_->front()) : preallocated_; +} + + +template +ElementType* InvalSet::StorageEnd() { + return IsUsingVector() ? &(vector_->back()) + 1 : preallocated_ + size_; +} + + +template +size_t InvalSet::GetElementIndex( + const ElementType* element) const { + VIXL_ASSERT((StorageBegin() <= element) && (element < StorageEnd())); + return element - StorageBegin(); +} + + +template +const ElementType* InvalSet::GetElementAt( + size_t index) const { + VIXL_ASSERT((IsUsingVector() && (index < vector_->size())) || + (index < size_)); + return StorageBegin() + index; +} + +template +ElementType* InvalSet::GetElementAt(size_t index) { + VIXL_ASSERT((IsUsingVector() && (index < vector_->size())) || + (index < size_)); + return StorageBegin() + index; +} + +template +const ElementType* InvalSet::GetFirstValidElement( + const ElementType* from, const ElementType* end) { + while ((from < end) && !IsValid(*from)) { + from++; + } + return from; +} + + +template +void InvalSet::CacheMinElement() { + VIXL_ASSERT(monitor() == 0); + VIXL_ASSERT(!empty()); + + if (valid_cached_min_) { + return; + } + + if (sorted_) { + const ElementType* min = GetFirstValidElement(StorageBegin(), StorageEnd()); + cached_min_index_ = GetElementIndex(min); + cached_min_key_ = GetKey(*min); + valid_cached_min_ = true; + } else { + Sort(kHardSort); + } + VIXL_ASSERT(valid_cached_min_); +} + + +template +bool InvalSet::ShouldReclaimMemory() const { + if (!IsUsingVector()) { + return false; + } + size_t n_invalid_elements = vector_->size() - size_; + return (n_invalid_elements > RECLAIM_FROM) && + (n_invalid_elements > vector_->size() / RECLAIM_FACTOR); +} + + +template +void InvalSet::ReclaimMemory() { + VIXL_ASSERT(monitor() == 0); + Clean(); +} + + +template +InvalSetIterator::InvalSetIterator(S* inval_set) + : using_vector_((inval_set != NULL) && inval_set->IsUsingVector()), + index_(0), + inval_set_(inval_set) { + if (inval_set != NULL) { + inval_set->Sort(S::kSoftSort); +#ifdef VIXL_DEBUG + inval_set->Acquire(); +#endif + if (using_vector_) { + iterator_ = typename std::vector::iterator( + inval_set_->vector_->begin()); + } + MoveToValidElement(); + } +} + + +template +InvalSetIterator::~InvalSetIterator() { +#ifdef VIXL_DEBUG + if (inval_set_ != NULL) inval_set_->Release(); +#endif +} + + +template +typename S::_ElementType* InvalSetIterator::Current() const { + VIXL_ASSERT(!Done()); + if (using_vector_) { + return &(*iterator_); + } else { + return &(inval_set_->preallocated_[index_]); + } +} + + +template +void InvalSetIterator::Advance() { + ++(*this); +} + + +template +bool InvalSetIterator::Done() const { + if (using_vector_) { + bool done = (iterator_ == inval_set_->vector_->end()); + VIXL_ASSERT(done == (index_ == inval_set_->size())); + return done; + } else { + return index_ == inval_set_->size(); + } +} + + +template +void InvalSetIterator::Finish() { + VIXL_ASSERT(inval_set_->sorted_); + if (using_vector_) { + iterator_ = inval_set_->vector_->end(); + } + index_ = inval_set_->size(); +} + + +template +void InvalSetIterator::DeleteCurrentAndAdvance() { + if (using_vector_) { + inval_set_->EraseInternal(&(*iterator_)); + MoveToValidElement(); + } else { + inval_set_->EraseInternal(inval_set_->preallocated_ + index_); + } +} + + +template +bool InvalSetIterator::IsValid(const ElementType& element) { + return S::IsValid(element); +} + + +template +typename S::_KeyType InvalSetIterator::GetKey(const ElementType& element) { + return S::GetKey(element); +} + + +template +void InvalSetIterator::MoveToValidElement() { + if (using_vector_) { + while ((iterator_ != inval_set_->vector_->end()) && !IsValid(*iterator_)) { + iterator_++; + } + } else { + VIXL_ASSERT(inval_set_->empty() || IsValid(inval_set_->preallocated_[0])); + // Nothing to do. + } +} + + +template +InvalSetIterator::InvalSetIterator(const InvalSetIterator& other) + : using_vector_(other.using_vector_), + index_(other.index_), + inval_set_(other.inval_set_) { +#ifdef VIXL_DEBUG + if (inval_set_ != NULL) inval_set_->Acquire(); +#endif +} + + +#if __cplusplus >= 201103L +template +InvalSetIterator::InvalSetIterator(InvalSetIterator&& other) noexcept + : using_vector_(false), index_(0), inval_set_(NULL) { + swap(*this, other); +} +#endif + + +template +InvalSetIterator& InvalSetIterator::operator=(InvalSetIterator other) { + swap(*this, other); + return *this; +} + + +template +bool InvalSetIterator::operator==(const InvalSetIterator& rhs) const { + bool equal = (inval_set_ == rhs.inval_set_); + + // If the inval_set_ matches, using_vector_ must also match. + VIXL_ASSERT(!equal || (using_vector_ == rhs.using_vector_)); + + if (using_vector_) { + equal = equal && (iterator_ == rhs.iterator_); + // In debug mode, index_ is maintained even with using_vector_. + VIXL_ASSERT(!equal || (index_ == rhs.index_)); + } else { + equal = equal && (index_ == rhs.index_); +#ifdef DEBUG + // If not using_vector_, iterator_ should be default-initialised. + typename std::vector::iterator default_iterator; + VIXL_ASSERT(iterator_ == default_iterator); + VIXL_ASSERT(rhs.iterator_ == default_iterator); +#endif + } + return equal; +} + + +template +InvalSetIterator& InvalSetIterator::operator++() { + // Pre-increment. + VIXL_ASSERT(!Done()); + if (using_vector_) { + iterator_++; +#ifdef VIXL_DEBUG + index_++; +#endif + MoveToValidElement(); + } else { + index_++; + } + return *this; +} + + +template +InvalSetIterator InvalSetIterator::operator++(int /* unused */) { + // Post-increment. + VIXL_ASSERT(!Done()); + InvalSetIterator old(*this); + ++(*this); + return old; +} + + +#undef TEMPLATE_INVALSET_P_DECL +#undef TEMPLATE_INVALSET_P_DEF + +} // namespace vixl + +#endif // VIXL_INVALSET_H_ diff --git a/3rdparty/vixl/include/vixl/macro-assembler-interface.h b/3rdparty/vixl/include/vixl/macro-assembler-interface.h new file mode 100644 index 0000000000..3c0421f2c7 --- /dev/null +++ b/3rdparty/vixl/include/vixl/macro-assembler-interface.h @@ -0,0 +1,75 @@ +// Copyright 2016, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_MACRO_ASSEMBLER_INTERFACE_H +#define VIXL_MACRO_ASSEMBLER_INTERFACE_H + +#include "assembler-base-vixl.h" + +namespace vixl { + +class MacroAssemblerInterface { + public: + virtual internal::AssemblerBase* AsAssemblerBase() = 0; + + virtual ~MacroAssemblerInterface() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {} + + virtual bool AllowMacroInstructions() const = 0; + virtual bool ArePoolsBlocked() const = 0; + + protected: + virtual void SetAllowMacroInstructions(bool allow) = 0; + + virtual void BlockPools() = 0; + virtual void ReleasePools() = 0; + virtual void EnsureEmitPoolsFor(size_t size) = 0; + + // Emit the branch over a literal/veneer pool, and any necessary padding + // before it. + virtual void EmitPoolHeader() = 0; + // When this is called, the label used for branching over the pool is bound. + // This can also generate additional padding, which must correspond to the + // alignment_ value passed to the PoolManager (which needs to keep track of + // the exact size of the generated pool). + virtual void EmitPoolFooter() = 0; + + // Emit n bytes of padding that does not have to be executable. + virtual void EmitPaddingBytes(int n) = 0; + // Emit n bytes of padding that has to be executable. Implementations must + // make sure this is a multiple of the instruction size. + virtual void EmitNopBytes(int n) = 0; + + // The following scopes need access to the above method in order to implement + // pool blocking and temporarily disable the macro-assembler. + friend class ExactAssemblyScope; + friend class EmissionCheckScope; + template + friend class PoolManager; +}; + +} // namespace vixl + +#endif // VIXL_MACRO_ASSEMBLER_INTERFACE_H diff --git a/3rdparty/vixl/include/vixl/platform-vixl.h b/3rdparty/vixl/include/vixl/platform-vixl.h new file mode 100644 index 0000000000..99f54d0c46 --- /dev/null +++ b/3rdparty/vixl/include/vixl/platform-vixl.h @@ -0,0 +1,39 @@ +// Copyright 2014, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef PLATFORM_H +#define PLATFORM_H + +// Define platform specific functionalities. +extern "C" { +#include +} + +namespace vixl { +inline void HostBreakpoint() { raise(SIGINT); } +} // namespace vixl + +#endif diff --git a/3rdparty/vixl/include/vixl/pool-manager-impl.h b/3rdparty/vixl/include/vixl/pool-manager-impl.h new file mode 100644 index 0000000000..a1bcaaad83 --- /dev/null +++ b/3rdparty/vixl/include/vixl/pool-manager-impl.h @@ -0,0 +1,522 @@ +// Copyright 2017, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_POOL_MANAGER_IMPL_H_ +#define VIXL_POOL_MANAGER_IMPL_H_ + +#include "pool-manager.h" + +#include +#include "assembler-base-vixl.h" + +namespace vixl { + + +template +T PoolManager::Emit(MacroAssemblerInterface* masm, + T pc, + int num_bytes, + ForwardReference* new_reference, + LocationBase* new_object, + EmitOption option) { + // Make sure that the buffer still has the alignment we think it does. + VIXL_ASSERT(IsAligned(masm->AsAssemblerBase() + ->GetBuffer() + ->GetStartAddress(), + buffer_alignment_)); + + // We should not call this method when the pools are blocked. + VIXL_ASSERT(!IsBlocked()); + if (objects_.empty()) return pc; + + // Emit header. + if (option == kBranchRequired) { + masm->EmitPoolHeader(); + // TODO: The pc at this point might not actually be aligned according to + // alignment_. This is to support the current AARCH32 MacroAssembler which + // does not have a fixed size instruction set. In practice, the pc will be + // aligned to the alignment instructions need for the current instruction + // set, so we do not need to align it here. All other calculations do take + // the alignment into account, which only makes the checkpoint calculations + // more conservative when we use T32. Uncomment the following assertion if + // the AARCH32 MacroAssembler is modified to only support one ISA at the + // time. + // VIXL_ASSERT(pc == AlignUp(pc, alignment_)); + pc += header_size_; + } else { + // If the header is optional, we might need to add some extra padding to + // meet the minimum location of the first object. + if (pc < objects_[0].min_location_) { + int32_t padding = objects_[0].min_location_ - pc; + masm->EmitNopBytes(padding); + pc += padding; + } + } + + PoolObject* existing_object = GetObjectIfTracked(new_object); + + // Go through all objects and emit one by one. + for (objects_iter iter = objects_.begin(); iter != objects_.end();) { + PoolObject& current = *iter; + if (ShouldSkipObject(¤t, + pc, + num_bytes, + new_reference, + new_object, + existing_object)) { + ++iter; + continue; + } + LocationBase* label_base = current.label_base_; + T aligned_pc = AlignUp(pc, current.alignment_); + masm->EmitPaddingBytes(aligned_pc - pc); + pc = aligned_pc; + VIXL_ASSERT(pc >= current.min_location_); + VIXL_ASSERT(pc <= current.max_location_); + // First call SetLocation, which will also resolve the references, and then + // call EmitPoolObject, which might add a new reference. + label_base->SetLocation(masm->AsAssemblerBase(), pc); + label_base->EmitPoolObject(masm); + int object_size = label_base->GetPoolObjectSizeInBytes(); + if (label_base->ShouldDeletePoolObjectOnPlacement()) { + label_base->MarkBound(); + iter = RemoveAndDelete(iter); + } else { + VIXL_ASSERT(!current.label_base_->ShouldDeletePoolObjectOnPlacement()); + current.label_base_->UpdatePoolObject(¤t); + VIXL_ASSERT(current.alignment_ >= label_base->GetPoolObjectAlignment()); + ++iter; + } + pc += object_size; + } + + // Recalculate the checkpoint before emitting the footer. The footer might + // call Bind() which will check if we need to emit. + RecalculateCheckpoint(); + + // Always emit footer - this might add some padding. + masm->EmitPoolFooter(); + pc = AlignUp(pc, alignment_); + + return pc; +} + +template +bool PoolManager::ShouldSkipObject(PoolObject* pool_object, + T pc, + int num_bytes, + ForwardReference* new_reference, + LocationBase* new_object, + PoolObject* existing_object) const { + // We assume that all objects before this have been skipped and all objects + // after this will be emitted, therefore we will emit the whole pool. Add + // the header size and alignment, as well as the number of bytes we are + // planning to emit. + T max_actual_location = pc + num_bytes + max_pool_size_; + + if (new_reference != NULL) { + // If we're adding a new object, also assume that it will have to be emitted + // before the object we are considering to skip. + VIXL_ASSERT(new_object != NULL); + T new_object_alignment = std::max(new_reference->object_alignment_, + new_object->GetPoolObjectAlignment()); + if ((existing_object != NULL) && + (existing_object->alignment_ > new_object_alignment)) { + new_object_alignment = existing_object->alignment_; + } + max_actual_location += + (new_object->GetPoolObjectSizeInBytes() + new_object_alignment - 1); + } + + // Hard limit. + if (max_actual_location >= pool_object->max_location_) return false; + + // Use heuristic. + return (pc < pool_object->skip_until_location_hint_); +} + +template +T PoolManager::UpdateCheckpointForObject(T checkpoint, + const PoolObject* object) { + checkpoint -= object->label_base_->GetPoolObjectSizeInBytes(); + if (checkpoint > object->max_location_) checkpoint = object->max_location_; + checkpoint = AlignDown(checkpoint, object->alignment_); + return checkpoint; +} + +template +static T MaxCheckpoint() { + return std::numeric_limits::max(); +} + +template +static inline bool CheckCurrentPC(T pc, T checkpoint) { + VIXL_ASSERT(pc <= checkpoint); + // We must emit the pools if we are at the checkpoint now. + return pc == checkpoint; +} + +template +static inline bool CheckFuturePC(T pc, T checkpoint) { + // We do not need to emit the pools now if the projected future PC will be + // equal to the checkpoint (we will need to emit the pools then). + return pc > checkpoint; +} + +template +bool PoolManager::MustEmit(T pc, + int num_bytes, + ForwardReference* reference, + LocationBase* label_base) const { + // Check if we are at or past the checkpoint. + if (CheckCurrentPC(pc, checkpoint_)) return true; + + // Check if the future PC will be past the checkpoint. + pc += num_bytes; + if (CheckFuturePC(pc, checkpoint_)) return true; + + // No new reference - nothing to do. + if (reference == NULL) { + VIXL_ASSERT(label_base == NULL); + return false; + } + + if (objects_.empty()) { + // Basic assertions that restrictions on the new (and only) reference are + // possible to satisfy. + VIXL_ASSERT(AlignUp(pc + header_size_, alignment_) >= + reference->min_object_location_); + VIXL_ASSERT(pc <= reference->max_object_location_); + return false; + } + + // Check if the object is already being tracked. + const PoolObject* existing_object = GetObjectIfTracked(label_base); + if (existing_object != NULL) { + // If the existing_object is already in existing_objects_ and its new + // alignment and new location restrictions are not stricter, skip the more + // expensive check. + if ((reference->min_object_location_ <= existing_object->min_location_) && + (reference->max_object_location_ >= existing_object->max_location_) && + (reference->object_alignment_ <= existing_object->alignment_)) { + return false; + } + } + + // Create a temporary object. + PoolObject temp(label_base); + temp.RestrictRange(reference->min_object_location_, + reference->max_object_location_); + temp.RestrictAlignment(reference->object_alignment_); + if (existing_object != NULL) { + temp.RestrictRange(existing_object->min_location_, + existing_object->max_location_); + temp.RestrictAlignment(existing_object->alignment_); + } + + // Check if the new reference can be added after the end of the current pool. + // If yes, we don't need to emit. + T last_reachable = AlignDown(temp.max_location_, temp.alignment_); + const PoolObject& last = objects_.back(); + T after_pool = AlignDown(last.max_location_, last.alignment_) + + last.label_base_->GetPoolObjectSizeInBytes(); + // The current object can be placed at the end of the pool, even if the last + // object is placed at the last possible location. + if (last_reachable >= after_pool) return false; + // The current object can be placed after the code we are about to emit and + // after the existing pool (with a pessimistic size estimate). + if (last_reachable >= pc + num_bytes + max_pool_size_) return false; + + // We're not in a trivial case, so we need to recalculate the checkpoint. + + // Check (conservatively) if we can fit it into the objects_ array, without + // breaking our assumptions. Here we want to recalculate the checkpoint as + // if the new reference was added to the PoolManager but without actually + // adding it (as removing it is non-trivial). + + T checkpoint = MaxCheckpoint(); + // Will temp be the last object in objects_? + if (PoolObjectLessThan(last, temp)) { + checkpoint = UpdateCheckpointForObject(checkpoint, &temp); + if (checkpoint < temp.min_location_) return true; + } + + bool temp_not_placed_yet = true; + for (int i = static_cast(objects_.size()) - 1; i >= 0; --i) { + const PoolObject& current = objects_[i]; + if (temp_not_placed_yet && PoolObjectLessThan(current, temp)) { + checkpoint = UpdateCheckpointForObject(checkpoint, &temp); + if (checkpoint < temp.min_location_) return true; + if (CheckFuturePC(pc, checkpoint)) return true; + temp_not_placed_yet = false; + } + if (current.label_base_ == label_base) continue; + checkpoint = UpdateCheckpointForObject(checkpoint, ¤t); + if (checkpoint < current.min_location_) return true; + if (CheckFuturePC(pc, checkpoint)) return true; + } + // temp is the object with the smallest max_location_. + if (temp_not_placed_yet) { + checkpoint = UpdateCheckpointForObject(checkpoint, &temp); + if (checkpoint < temp.min_location_) return true; + } + + // Take the header into account. + checkpoint -= header_size_; + checkpoint = AlignDown(checkpoint, alignment_); + + return CheckFuturePC(pc, checkpoint); +} + +template +void PoolManager::RecalculateCheckpoint(SortOption sort_option) { + // TODO: Improve the max_pool_size_ estimate by starting from the + // min_location_ of the first object, calculating the end of the pool as if + // all objects were placed starting from there, and in the end adding the + // maximum object alignment found minus one (which is the maximum extra + // padding we would need if we were to relocate the pool to a different + // address). + max_pool_size_ = 0; + + if (objects_.empty()) { + checkpoint_ = MaxCheckpoint(); + return; + } + + // Sort objects by their max_location_. + if (sort_option == kSortRequired) { + std::sort(objects_.begin(), objects_.end(), PoolObjectLessThan); + } + + // Add the header size and header and footer max alignment to the maximum + // pool size. + max_pool_size_ += header_size_ + 2 * (alignment_ - 1); + + T checkpoint = MaxCheckpoint(); + int last_object_index = static_cast(objects_.size()) - 1; + for (int i = last_object_index; i >= 0; --i) { + // Bring back the checkpoint by the size of the current object, unless + // we need to bring it back more, then align. + PoolObject& current = objects_[i]; + checkpoint = UpdateCheckpointForObject(checkpoint, ¤t); + VIXL_ASSERT(checkpoint >= current.min_location_); + max_pool_size_ += (current.alignment_ - 1 + + current.label_base_->GetPoolObjectSizeInBytes()); + } + // Take the header into account. + checkpoint -= header_size_; + checkpoint = AlignDown(checkpoint, alignment_); + + // Update the checkpoint of the pool manager. + checkpoint_ = checkpoint; + + // NOTE: To handle min_location_ in the generic case, we could make a second + // pass of the objects_ vector, increasing the checkpoint as needed, while + // maintaining the alignment requirements. + // It should not be possible to have any issues with min_location_ with actual + // code, since there should always be some kind of branch over the pool, + // whether introduced by the pool emission or by the user, which will make + // sure the min_location_ requirement is satisfied. It's possible that the + // user could emit code in the literal pool and intentionally load the first + // value and then fall-through into the pool, but that is not a supported use + // of VIXL and we will assert in that case. +} + +template +bool PoolManager::PoolObjectLessThan(const PoolObject& a, + const PoolObject& b) { + if (a.max_location_ != b.max_location_) + return (a.max_location_ < b.max_location_); + int a_size = a.label_base_->GetPoolObjectSizeInBytes(); + int b_size = b.label_base_->GetPoolObjectSizeInBytes(); + if (a_size != b_size) return (a_size < b_size); + if (a.alignment_ != b.alignment_) return (a.alignment_ < b.alignment_); + if (a.min_location_ != b.min_location_) + return (a.min_location_ < b.min_location_); + return false; +} + +template +void PoolManager::AddObjectReference(const ForwardReference* reference, + LocationBase* label_base) { + VIXL_ASSERT(reference->object_alignment_ <= buffer_alignment_); + VIXL_ASSERT(label_base->GetPoolObjectAlignment() <= buffer_alignment_); + + PoolObject* object = GetObjectIfTracked(label_base); + + if (object == NULL) { + PoolObject new_object(label_base); + new_object.RestrictRange(reference->min_object_location_, + reference->max_object_location_); + new_object.RestrictAlignment(reference->object_alignment_); + Insert(new_object); + } else { + object->RestrictRange(reference->min_object_location_, + reference->max_object_location_); + object->RestrictAlignment(reference->object_alignment_); + + // Move the object, if needed. + if (objects_.size() != 1) { + PoolObject new_object(*object); + ptrdiff_t distance = std::distance(objects_.data(), object); + objects_.erase(objects_.begin() + distance); + Insert(new_object); + } + } + // No need to sort, we inserted the object in an already sorted array. + RecalculateCheckpoint(kNoSortRequired); +} + +template +void PoolManager::Insert(const PoolObject& new_object) { + bool inserted = false; + // Place the object in the right position. + for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) { + PoolObject& current = *iter; + if (!PoolObjectLessThan(current, new_object)) { + objects_.insert(iter, new_object); + inserted = true; + break; + } + } + if (!inserted) { + objects_.push_back(new_object); + } +} + +template +void PoolManager::RemoveAndDelete(PoolObject* object) { + for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) { + PoolObject& current = *iter; + if (current.label_base_ == object->label_base_) { + (void)RemoveAndDelete(iter); + return; + } + } + VIXL_UNREACHABLE(); +} + +template +typename PoolManager::objects_iter PoolManager::RemoveAndDelete( + objects_iter iter) { + PoolObject& object = *iter; + LocationBase* label_base = object.label_base_; + + // Check if we also need to delete the LocationBase object. + if (label_base->ShouldBeDeletedOnPoolManagerDestruction()) { + delete_on_destruction_.push_back(label_base); + } + if (label_base->ShouldBeDeletedOnPlacementByPoolManager()) { + VIXL_ASSERT(!label_base->ShouldBeDeletedOnPoolManagerDestruction()); + delete label_base; + } + + return objects_.erase(iter); +} + +template +T PoolManager::Bind(MacroAssemblerInterface* masm, + LocationBase* object, + T location) { + PoolObject* existing_object = GetObjectIfTracked(object); + int alignment; + T min_location; + if (existing_object == NULL) { + alignment = object->GetMaxAlignment(); + min_location = object->GetMinLocation(); + } else { + alignment = existing_object->alignment_; + min_location = existing_object->min_location_; + } + + // Align if needed, and add necessary padding to reach the min_location_. + T aligned_location = AlignUp(location, alignment); + masm->EmitNopBytes(aligned_location - location); + location = aligned_location; + while (location < min_location) { + masm->EmitNopBytes(alignment); + location += alignment; + } + + object->SetLocation(masm->AsAssemblerBase(), location); + object->MarkBound(); + + if (existing_object != NULL) { + RemoveAndDelete(existing_object); + // No need to sort, we removed the object from a sorted array. + RecalculateCheckpoint(kNoSortRequired); + } + + // We assume that the maximum padding we can possibly add here is less + // than the header alignment - hence that we're not going to go past our + // checkpoint. + VIXL_ASSERT(!CheckFuturePC(location, checkpoint_)); + return location; +} + +template +void PoolManager::Release(T pc) { + USE(pc); + if (--monitor_ == 0) { + // Ensure the pool has not been blocked for too long. + VIXL_ASSERT(pc <= checkpoint_); + } +} + +template +PoolManager::~PoolManager() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { +#ifdef VIXL_DEBUG + // Check for unbound objects. + for (objects_iter iter = objects_.begin(); iter != objects_.end(); ++iter) { + // There should not be any bound objects left in the pool. For unbound + // objects, we will check in the destructor of the object itself. + VIXL_ASSERT(!(*iter).label_base_->IsBound()); + } +#endif + // Delete objects the pool manager owns. + for (typename std::vector*>::iterator + iter = delete_on_destruction_.begin(), + end = delete_on_destruction_.end(); + iter != end; + ++iter) { + delete *iter; + } +} + +template +int PoolManager::GetPoolSizeForTest() const { + // Iterate over objects and return their cumulative size. This does not take + // any padding into account, just the size of the objects themselves. + int size = 0; + for (const_objects_iter iter = objects_.begin(); iter != objects_.end(); + ++iter) { + size += (*iter).label_base_->GetPoolObjectSizeInBytes(); + } + return size; +} +} + +#endif // VIXL_POOL_MANAGER_IMPL_H_ diff --git a/3rdparty/vixl/include/vixl/pool-manager.h b/3rdparty/vixl/include/vixl/pool-manager.h new file mode 100644 index 0000000000..27fa69ec2c --- /dev/null +++ b/3rdparty/vixl/include/vixl/pool-manager.h @@ -0,0 +1,555 @@ +// Copyright 2017, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_POOL_MANAGER_H_ +#define VIXL_POOL_MANAGER_H_ + +#include + +#include +#include +#include +#include + +#include "globals-vixl.h" +#include "macro-assembler-interface.h" +#include "utils-vixl.h" + +namespace vixl { + +class TestPoolManager; + +// There are four classes declared in this header file: +// PoolManager, PoolObject, ForwardReference and LocationBase. + +// The PoolManager manages both literal and veneer pools, and is designed to be +// shared between AArch32 and AArch64. A pool is represented as an abstract +// collection of references to objects. The manager does not need to know +// architecture-specific details about literals and veneers; the actual +// emission of the pool objects is delegated. +// +// Literal and Label will derive from LocationBase. The MacroAssembler will +// create these objects as instructions that reference pool objects are +// encountered, and ask the PoolManager to track them. The PoolManager will +// create an internal PoolObject object for each object derived from +// LocationBase. Some of these PoolObject objects will be deleted when placed +// (e.g. the ones corresponding to Literals), whereas others will be updated +// with a new range when placed (e.g. Veneers) and deleted when Bind() is +// called on the PoolManager with their corresponding object as a parameter. +// +// A ForwardReference represents a reference to a PoolObject that will be +// placed later in the instruction stream. Each ForwardReference may only refer +// to one PoolObject, but many ForwardReferences may refer to the same +// object. +// +// A PoolObject represents an object that has not yet been placed. The final +// location of a PoolObject (and hence the LocationBase object to which it +// corresponds) is constrained mostly by the instructions that refer to it, but +// PoolObjects can also have inherent constraints, such as alignment. +// +// LocationBase objects, unlike PoolObject objects, can be used outside of the +// pool manager (e.g. as manually placed literals, which may still have +// forward references that need to be resolved). +// +// At the moment, each LocationBase will have at most one PoolObject that keeps +// the relevant information for placing this object in the pool. When that +// object is placed, all forward references of the object are resolved. For +// that reason, we do not need to keep track of the ForwardReference objects in +// the PoolObject. + +// T is an integral type used for representing locations. For a 32-bit +// architecture it will typically be int32_t, whereas for a 64-bit +// architecture it will be int64_t. +template +class ForwardReference; +template +class PoolObject; +template +class PoolManager; + +// Represents an object that has a size and alignment, and either has a known +// location or has not been placed yet. An object of a subclass of LocationBase +// will typically keep track of a number of ForwardReferences when it has not +// yet been placed, but LocationBase does not assume or implement that +// functionality. LocationBase provides virtual methods for emitting the +// object, updating all the forward references, and giving the PoolManager +// information on the lifetime of this object and the corresponding PoolObject. +template +class LocationBase { + public: + // The size of a LocationBase object is restricted to 4KB, in order to avoid + // situations where the size of the pool becomes larger than the range of + // an unconditional branch. This cannot happen without having large objects, + // as typically the range of an unconditional branch is the larger range + // an instruction supports. + // TODO: This would ideally be an architecture-specific value, perhaps + // another template parameter. + static const int kMaxObjectSize = 4 * KBytes; + + // By default, LocationBase objects are aligned naturally to their size. + LocationBase(uint32_t type, int size) + : pool_object_size_(size), + pool_object_alignment_(size), + pool_object_type_(type), + is_bound_(false), + location_(0) { + VIXL_ASSERT(size > 0); + VIXL_ASSERT(size <= kMaxObjectSize); + VIXL_ASSERT(IsPowerOf2(size)); + } + + // Allow alignment to be specified, as long as it is smaller than the size. + LocationBase(uint32_t type, int size, int alignment) + : pool_object_size_(size), + pool_object_alignment_(alignment), + pool_object_type_(type), + is_bound_(false), + location_(0) { + VIXL_ASSERT(size > 0); + VIXL_ASSERT(size <= kMaxObjectSize); + VIXL_ASSERT(IsPowerOf2(alignment)); + VIXL_ASSERT(alignment <= size); + } + + // Constructor for locations that are already bound. + explicit LocationBase(T location) + : pool_object_size_(-1), + pool_object_alignment_(-1), + pool_object_type_(0), + is_bound_(true), + location_(location) {} + + virtual ~LocationBase() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {} + + // The PoolManager should assume ownership of some objects, and delete them + // after they have been placed. This can happen for example for literals that + // are created internally to the MacroAssembler and the user doesn't get a + // handle to. By default, the PoolManager will not do this. + virtual bool ShouldBeDeletedOnPlacementByPoolManager() const { return false; } + // The PoolManager should assume ownership of some objects, and delete them + // when it is destroyed. By default, the PoolManager will not do this. + virtual bool ShouldBeDeletedOnPoolManagerDestruction() const { return false; } + + // Emit the PoolObject. Derived classes will implement this method to emit + // the necessary data and/or code (for example, to emit a literal or a + // veneer). This should not add padding, as it is added explicitly by the pool + // manager. + virtual void EmitPoolObject(MacroAssemblerInterface* masm) = 0; + + // Resolve the references to this object. Will encode the necessary offset + // in the instruction corresponding to each reference and then delete it. + // TODO: An alternative here would be to provide a ResolveReference() + // method that only asks the LocationBase to resolve a specific reference + // (thus allowing the pool manager to resolve some of the references only). + // This would mean we need to have some kind of API to get all the references + // to a LabelObject. + virtual void ResolveReferences(internal::AssemblerBase* assembler) = 0; + + // Returns true when the PoolObject corresponding to this LocationBase object + // needs to be removed from the pool once placed, and false if it needs to + // be updated instead (in which case UpdatePoolObject will be called). + virtual bool ShouldDeletePoolObjectOnPlacement() const { return true; } + + // Update the PoolObject after placing it, if necessary. This will happen for + // example in the case of a placed veneer, where we need to use a new updated + // range and a new reference (from the newly added branch instruction). + // By default, this does nothing, to avoid forcing objects that will not need + // this to have an empty implementation. + virtual void UpdatePoolObject(PoolObject*) {} + + // Implement heuristics for emitting this object. If a margin is to be used + // as a hint during pool emission, we will try not to emit the object if we + // are further away from the maximum reachable location by more than the + // margin. + virtual bool UsePoolObjectEmissionMargin() const { return false; } + virtual T GetPoolObjectEmissionMargin() const { + VIXL_ASSERT(UsePoolObjectEmissionMargin() == false); + return 0; + } + + int GetPoolObjectSizeInBytes() const { return pool_object_size_; } + int GetPoolObjectAlignment() const { return pool_object_alignment_; } + uint32_t GetPoolObjectType() const { return pool_object_type_; } + + bool IsBound() const { return is_bound_; } + T GetLocation() const { return location_; } + + // This function can be called multiple times before the object is marked as + // bound with MarkBound() below. This is because some objects (e.g. the ones + // used to represent labels) can have veneers; every time we place a veneer + // we need to keep track of the location in order to resolve the references + // to the object. Reusing the location_ field for this is convenient. + void SetLocation(internal::AssemblerBase* assembler, T location) { + VIXL_ASSERT(!is_bound_); + location_ = location; + ResolveReferences(assembler); + } + + void MarkBound() { + VIXL_ASSERT(!is_bound_); + is_bound_ = true; + } + + // The following two functions are used when an object is bound by a call to + // PoolManager::Bind(). + virtual int GetMaxAlignment() const { + VIXL_ASSERT(!ShouldDeletePoolObjectOnPlacement()); + return 1; + } + virtual T GetMinLocation() const { + VIXL_ASSERT(!ShouldDeletePoolObjectOnPlacement()); + return 0; + } + + private: + // The size of the corresponding PoolObject, in bytes. + int pool_object_size_; + // The alignment of the corresponding PoolObject; this must be a power of two. + int pool_object_alignment_; + + // Different derived classes should have different type values. This can be + // used internally by the PoolManager for grouping of objects. + uint32_t pool_object_type_; + // Has the object been bound to a location yet? + bool is_bound_; + + protected: + // See comment on SetLocation() for the use of this field. + T location_; +}; + +template +class PoolObject { + public: + // By default, PoolObjects have no inherent position constraints. + explicit PoolObject(LocationBase* parent) + : label_base_(parent), + min_location_(0), + max_location_(std::numeric_limits::max()), + alignment_(parent->GetPoolObjectAlignment()), + skip_until_location_hint_(0), + type_(parent->GetPoolObjectType()) { + VIXL_ASSERT(IsPowerOf2(alignment_)); + UpdateLocationHint(); + } + + // Reset the minimum and maximum location and the alignment of the object. + // This function is public in order to allow the LocationBase corresponding to + // this PoolObject to update the PoolObject when placed, e.g. in the case of + // veneers. The size and type of the object cannot be modified. + void Update(T min, T max, int alignment) { + // We don't use RestrictRange here as the new range is independent of the + // old range (and the maximum location is typically larger). + min_location_ = min; + max_location_ = max; + RestrictAlignment(alignment); + UpdateLocationHint(); + } + + private: + void RestrictRange(T min, T max) { + VIXL_ASSERT(min <= max_location_); + VIXL_ASSERT(max >= min_location_); + min_location_ = std::max(min_location_, min); + max_location_ = std::min(max_location_, max); + UpdateLocationHint(); + } + + void RestrictAlignment(int alignment) { + VIXL_ASSERT(IsPowerOf2(alignment)); + VIXL_ASSERT(IsPowerOf2(alignment_)); + alignment_ = std::max(alignment_, alignment); + } + + void UpdateLocationHint() { + if (label_base_->UsePoolObjectEmissionMargin()) { + skip_until_location_hint_ = + max_location_ - label_base_->GetPoolObjectEmissionMargin(); + } + } + + // The LocationBase that this pool object represents. + LocationBase* label_base_; + + // Hard, precise location constraints for the start location of the object. + // They are both inclusive, that is the start location of the object can be + // at any location between min_location_ and max_location_, themselves + // included. + T min_location_; + T max_location_; + + // The alignment must be a power of two. + int alignment_; + + // Avoid generating this object until skip_until_location_hint_. This + // supports cases where placing the object in the pool has an inherent cost + // that could be avoided in some other way. Veneers are a typical example; we + // would prefer to branch directly (over a pool) rather than use veneers, so + // this value can be set using some heuristic to leave them in the pool. + // This value is only a hint, which will be ignored if it has to in order to + // meet the hard constraints we have. + T skip_until_location_hint_; + + // Used only to group objects of similar type together. The PoolManager does + // not know what the types represent. + uint32_t type_; + + friend class PoolManager; +}; + +// Class that represents a forward reference. It is the responsibility of +// LocationBase objects to keep track of forward references and patch them when +// an object is placed - this class is only used by the PoolManager in order to +// restrict the requirements on PoolObjects it is tracking. +template +class ForwardReference { + public: + ForwardReference(T location, + int size, + T min_object_location, + T max_object_location, + int object_alignment = 1) + : location_(location), + size_(size), + object_alignment_(object_alignment), + min_object_location_(min_object_location), + max_object_location_(max_object_location) { + VIXL_ASSERT(AlignDown(max_object_location, object_alignment) >= + min_object_location); + } + + bool LocationIsEncodable(T location) const { + return location >= min_object_location_ && + location <= max_object_location_ && + IsAligned(location, object_alignment_); + } + + T GetLocation() const { return location_; } + T GetMinLocation() const { return min_object_location_; } + T GetMaxLocation() const { return max_object_location_; } + int GetAlignment() const { return object_alignment_; } + + // Needed for InvalSet. + void SetLocationToInvalidateOnly(T location) { location_ = location; } + + private: + // The location of the thing that contains the reference. For example, this + // can be the location of the branch or load instruction. + T location_; + + // The size of the instruction that makes the reference, in bytes. + int size_; + + // The alignment that the object must satisfy for this reference - must be a + // power of two. + int object_alignment_; + + // Specify the possible locations where the object could be stored. AArch32's + // PC offset, and T32's PC alignment calculations should be applied by the + // Assembler, not here. The PoolManager deals only with simple locations. + // Including min_object_address_ is necessary to handle AArch32 some + // instructions which have a minimum offset of 0, but also have the implicit + // PC offset. + // Note that this structure cannot handle sparse ranges, such as A32's ADR, + // but doing so is costly and probably not useful in practice. The min and + // and max object location both refer to the beginning of the object, are + // inclusive and are not affected by the object size. E.g. if + // max_object_location_ is equal to X, we can place the object at location X + // regardless of its size. + T min_object_location_; + T max_object_location_; + + friend class PoolManager; +}; + + +template +class PoolManager { + public: + PoolManager(int header_size, int alignment, int buffer_alignment) + : header_size_(header_size), + alignment_(alignment), + buffer_alignment_(buffer_alignment), + checkpoint_(std::numeric_limits::max()), + max_pool_size_(0), + monitor_(0) {} + + ~PoolManager() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION; + + // Check if we will need to emit the pool at location 'pc', when planning to + // generate a certain number of bytes. This optionally takes a + // ForwardReference we are about to generate, in which case the size of the + // reference must be included in 'num_bytes'. + bool MustEmit(T pc, + int num_bytes = 0, + ForwardReference* reference = NULL, + LocationBase* object = NULL) const; + + enum EmitOption { kBranchRequired, kNoBranchRequired }; + + // Emit the pool at location 'pc', using 'masm' as the macroassembler. + // The branch over the header can be optionally omitted using 'option'. + // Returns the new PC after pool emission. + // This expects a number of bytes that are about to be emitted, to be taken + // into account in heuristics for pool object emission. + // This also optionally takes a forward reference and an object as + // parameters, to be used in the case where emission of the pool is triggered + // by adding a new reference to the pool that does not fit. The pool manager + // will need this information in order to apply its heuristics correctly. + T Emit(MacroAssemblerInterface* masm, + T pc, + int num_bytes = 0, + ForwardReference* new_reference = NULL, + LocationBase* new_object = NULL, + EmitOption option = kBranchRequired); + + // Add 'reference' to 'object'. Should not be preceded by a call to MustEmit() + // that returned true, unless Emit() has been successfully afterwards. + void AddObjectReference(const ForwardReference* reference, + LocationBase* object); + + // This is to notify the pool that a LocationBase has been bound to a location + // and does not need to be tracked anymore. + // This will happen, for example, for Labels, which are manually bound by the + // user. + // This can potentially add some padding bytes in order to meet the object + // requirements, and will return the new location. + T Bind(MacroAssemblerInterface* masm, LocationBase* object, T location); + + // Functions for blocking and releasing the pools. + void Block() { monitor_++; } + void Release(T pc); + bool IsBlocked() const { return monitor_ != 0; } + + private: + typedef typename std::vector >::iterator objects_iter; + typedef + typename std::vector >::const_iterator const_objects_iter; + + PoolObject* GetObjectIfTracked(LocationBase* label) { + return const_cast*>( + static_cast*>(this)->GetObjectIfTracked(label)); + } + + const PoolObject* GetObjectIfTracked(LocationBase* label) const { + for (const_objects_iter iter = objects_.begin(); iter != objects_.end(); + ++iter) { + const PoolObject& current = *iter; + if (current.label_base_ == label) return ¤t; + } + return NULL; + } + + // Helper function for calculating the checkpoint. + enum SortOption { kSortRequired, kNoSortRequired }; + void RecalculateCheckpoint(SortOption sort_option = kSortRequired); + + // Comparison function for using std::sort() on objects_. PoolObject A is + // ordered before PoolObject B when A should be emitted before B. The + // comparison depends on the max_location_, size_, alignment_ and + // min_location_. + static bool PoolObjectLessThan(const PoolObject& a, + const PoolObject& b); + + // Helper function used in the checkpoint calculation. 'checkpoint' is the + // current checkpoint, which is modified to take 'object' into account. The + // new checkpoint is returned. + static T UpdateCheckpointForObject(T checkpoint, const PoolObject* object); + + // Helper function to add a new object into a sorted objects_ array. + void Insert(const PoolObject& new_object); + + // Helper functions to remove an object from objects_ and delete the + // corresponding LocationBase object, if necessary. This will be called + // either after placing the object, or when Bind() is called. + void RemoveAndDelete(PoolObject* object); + objects_iter RemoveAndDelete(objects_iter iter); + + // Helper function to check if we should skip emitting an object. + bool ShouldSkipObject(PoolObject* pool_object, + T pc, + int num_bytes, + ForwardReference* new_reference, + LocationBase* new_object, + PoolObject* existing_object) const; + + // Used only for debugging. + void DumpCurrentState(T pc) const; + + // Methods used for testing only, via the test friend classes. + bool PoolIsEmptyForTest() const { return objects_.empty(); } + T GetCheckpointForTest() const { return checkpoint_; } + int GetPoolSizeForTest() const; + + // The objects we are tracking references to. The objects_ vector is sorted + // at all times between calls to the public members of the PoolManager. It + // is sorted every time we add, delete or update a PoolObject. + // TODO: Consider a more efficient data structure here, to allow us to delete + // elements as we emit them. + std::vector > objects_; + + // Objects to be deleted on pool destruction. + std::vector*> delete_on_destruction_; + + // The header_size_ and alignment_ values are hardcoded for each instance of + // PoolManager. The PoolManager does not know how to emit the header, and + // relies on the EmitPoolHeader and EndPool methods of the + // MacroAssemblerInterface for that. It will also emit padding if necessary, + // both for the header and at the end of the pool, according to alignment_, + // and using the EmitNopBytes and EmitPaddingBytes method of the + // MacroAssemblerInterface. + + // The size of the header, in bytes. + int header_size_; + // The alignment of the header - must be a power of two. + int alignment_; + // The alignment of the buffer - we cannot guarantee any object alignment + // larger than this alignment. When a buffer is grown, this alignment has + // to be guaranteed. + // TODO: Consider extending this to describe the guaranteed alignment as the + // modulo of a known number. + int buffer_alignment_; + + // The current checkpoint. This is the latest location at which the pool + // *must* be emitted. This should not be visible outside the pool manager + // and should only be updated in RecalculateCheckpoint. + T checkpoint_; + + // Maximum size of the pool, assuming we need the maximum possible padding + // for each object and for the header. It is only updated in + // RecalculateCheckpoint. + T max_pool_size_; + + // Indicates whether the emission of this pool is blocked. + int monitor_; + + friend class vixl::TestPoolManager; +}; + + +} // namespace vixl + +#endif // VIXL_POOL_MANAGER_H_ diff --git a/3rdparty/vixl/include/vixl/utils-vixl.h b/3rdparty/vixl/include/vixl/utils-vixl.h new file mode 100644 index 0000000000..a6632b2fc0 --- /dev/null +++ b/3rdparty/vixl/include/vixl/utils-vixl.h @@ -0,0 +1,1442 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef VIXL_UTILS_H +#define VIXL_UTILS_H + +#include +#include +#include +#include +#include + +#include "compiler-intrinsics-vixl.h" +#include "globals-vixl.h" + +namespace vixl { + +// Macros for compile-time format checking. +#if GCC_VERSION_OR_NEWER(4, 4, 0) +#define PRINTF_CHECK(format_index, varargs_index) \ + __attribute__((format(gnu_printf, format_index, varargs_index))) +#else +#define PRINTF_CHECK(format_index, varargs_index) +#endif + +#ifdef __GNUC__ +#define VIXL_HAS_DEPRECATED_WITH_MSG +#elif defined(__clang__) +#if __has_extension(attribute_deprecated_with_message) +#define VIXL_HAS_DEPRECATED_WITH_MSG +#endif +#endif + +#ifdef VIXL_HAS_DEPRECATED_WITH_MSG +#define VIXL_DEPRECATED(replaced_by, declarator) \ + __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator +#else +#define VIXL_DEPRECATED(replaced_by, declarator) declarator +#endif + +#ifdef VIXL_DEBUG +#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE() +#else +#define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH() +#endif + +template +constexpr size_t ArrayLength(const T (&)[n]) { + return n; +} + +inline uint64_t GetUintMask(unsigned bits) { + VIXL_ASSERT(bits <= 64); + uint64_t base = (bits >= 64) ? 0 : (UINT64_C(1) << bits); + return base - 1; +} + +inline uint64_t GetSignMask(unsigned bits) { + VIXL_ASSERT(bits <= 64); + return UINT64_C(1) << (bits - 1); +} + +// Check number width. +// TODO: Refactor these using templates. +inline bool IsIntN(unsigned n, uint32_t x) { + VIXL_ASSERT((0 < n) && (n <= 32)); + return x <= static_cast(INT32_MAX >> (32 - n)); +} +inline bool IsIntN(unsigned n, int32_t x) { + VIXL_ASSERT((0 < n) && (n <= 32)); + if (n == 32) return true; + int32_t limit = INT32_C(1) << (n - 1); + return (-limit <= x) && (x < limit); +} +inline bool IsIntN(unsigned n, uint64_t x) { + VIXL_ASSERT((0 < n) && (n <= 64)); + return x <= static_cast(INT64_MAX >> (64 - n)); +} +inline bool IsIntN(unsigned n, int64_t x) { + VIXL_ASSERT((0 < n) && (n <= 64)); + if (n == 64) return true; + int64_t limit = INT64_C(1) << (n - 1); + return (-limit <= x) && (x < limit); +} +VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) { + return IsIntN(n, x); +} + +inline bool IsUintN(unsigned n, uint32_t x) { + VIXL_ASSERT((0 < n) && (n <= 32)); + if (n >= 32) return true; + return !(x >> n); +} +inline bool IsUintN(unsigned n, int32_t x) { + VIXL_ASSERT((0 < n) && (n < 32)); + // Convert to an unsigned integer to avoid implementation-defined behavior. + return !(static_cast(x) >> n); +} +inline bool IsUintN(unsigned n, uint64_t x) { + VIXL_ASSERT((0 < n) && (n <= 64)); + if (n >= 64) return true; + return !(x >> n); +} +inline bool IsUintN(unsigned n, int64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + // Convert to an unsigned integer to avoid implementation-defined behavior. + return !(static_cast(x) >> n); +} +VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) { + return IsUintN(n, x); +} + +inline uint64_t TruncateToUintN(unsigned n, uint64_t x) { + VIXL_ASSERT((0 < n) && (n < 64)); + return static_cast(x) & ((UINT64_C(1) << n) - 1); +} +VIXL_DEPRECATED("TruncateToUintN", + inline uint64_t truncate_to_intn(unsigned n, int64_t x)) { + return TruncateToUintN(n, x); +} + +// clang-format off +#define INT_1_TO_32_LIST(V) \ +V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \ +V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \ +V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \ +V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32) + +#define INT_33_TO_63_LIST(V) \ +V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \ +V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \ +V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \ +V(57) V(58) V(59) V(60) V(61) V(62) V(63) + +#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V) + +// clang-format on + +#define DECLARE_IS_INT_N(N) \ + inline bool IsInt##N(int64_t x) { return IsIntN(N, x); } \ + VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \ + return IsIntN(N, x); \ + } + +#define DECLARE_IS_UINT_N(N) \ + inline bool IsUint##N(int64_t x) { return IsUintN(N, x); } \ + VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \ + return IsUintN(N, x); \ + } + +#define DECLARE_TRUNCATE_TO_UINT_32(N) \ + inline uint32_t TruncateToUint##N(uint64_t x) { \ + return static_cast(TruncateToUintN(N, x)); \ + } \ + VIXL_DEPRECATED("TruncateToUint" #N, \ + inline uint32_t truncate_to_int##N(int64_t x)) { \ + return TruncateToUint##N(x); \ + } + +INT_1_TO_63_LIST(DECLARE_IS_INT_N) +INT_1_TO_63_LIST(DECLARE_IS_UINT_N) +INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32) + +#undef DECLARE_IS_INT_N +#undef DECLARE_IS_UINT_N +#undef DECLARE_TRUNCATE_TO_INT_N + +// Bit field extraction. +inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) { + VIXL_ASSERT((static_cast(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + if ((msb == 63) && (lsb == 0)) return x; + return (x >> lsb) & ((static_cast(1) << (1 + msb - lsb)) - 1); +} + + +inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint64_t x) { + VIXL_ASSERT((static_cast(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x)); +} + + +inline int64_t ExtractSignedBitfield64(int msb, int lsb, uint64_t x) { + VIXL_ASSERT((static_cast(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x); + // If the highest extracted bit is set, sign extend. + if ((temp >> (msb - lsb)) == 1) { + temp |= ~UINT64_C(0) << (msb - lsb); + } + int64_t result; + memcpy(&result, &temp, sizeof(result)); + return result; +} + +inline int32_t ExtractSignedBitfield32(int msb, int lsb, uint64_t x) { + VIXL_ASSERT((static_cast(msb) < sizeof(x) * 8) && (lsb >= 0) && + (msb >= lsb)); + uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x)); + int32_t result; + memcpy(&result, &temp, sizeof(result)); + return result; +} + +inline uint64_t RotateRight(uint64_t value, + unsigned int rotate, + unsigned int width) { + VIXL_ASSERT((width > 0) && (width <= 64)); + uint64_t width_mask = ~UINT64_C(0) >> (64 - width); + rotate &= 63; + if (rotate > 0) { + value &= width_mask; + value = (value << (width - rotate)) | (value >> rotate); + } + return value & width_mask; +} + + +// Wrapper class for passing FP16 values through the assembler. +// This is purely to aid with type checking/casting. +class Float16 { + public: + explicit Float16(double dvalue); + Float16() : rawbits_(0x0) {} + friend uint16_t Float16ToRawbits(Float16 value); + friend Float16 RawbitsToFloat16(uint16_t bits); + + protected: + uint16_t rawbits_; +}; + +// Floating point representation. +uint16_t Float16ToRawbits(Float16 value); + + +uint32_t FloatToRawbits(float value); +VIXL_DEPRECATED("FloatToRawbits", + inline uint32_t float_to_rawbits(float value)) { + return FloatToRawbits(value); +} + +uint64_t DoubleToRawbits(double value); +VIXL_DEPRECATED("DoubleToRawbits", + inline uint64_t double_to_rawbits(double value)) { + return DoubleToRawbits(value); +} + +Float16 RawbitsToFloat16(uint16_t bits); + +float RawbitsToFloat(uint32_t bits); +VIXL_DEPRECATED("RawbitsToFloat", + inline float rawbits_to_float(uint32_t bits)) { + return RawbitsToFloat(bits); +} + +double RawbitsToDouble(uint64_t bits); +VIXL_DEPRECATED("RawbitsToDouble", + inline double rawbits_to_double(uint64_t bits)) { + return RawbitsToDouble(bits); +} + +// Some compilers dislike negating unsigned integers, +// so we provide an equivalent. +template +T UnsignedNegate(T value) { + VIXL_STATIC_ASSERT(std::is_unsigned::value); + return ~value + 1; +} + +// An absolute operation for signed integers that is defined for results outside +// the representable range. Specifically, Abs(MIN_INT) is MIN_INT. +template +T Abs(T val) { + // TODO: this static assertion is for signed integer inputs, as that's the + // only type tested. However, the code should work for all numeric inputs. + // Remove the assertion and this comment when more tests are available. + VIXL_STATIC_ASSERT(std::is_signed::value && std::is_integral::value); + return ((val >= -std::numeric_limits::max()) && (val < 0)) ? -val : val; +} + +// Convert unsigned to signed numbers in a well-defined way (using two's +// complement representations). +inline int64_t RawbitsToInt64(uint64_t bits) { + return (bits >= UINT64_C(0x8000000000000000)) + ? (-static_cast(UnsignedNegate(bits) - 1) - 1) + : static_cast(bits); +} + +inline int32_t RawbitsToInt32(uint32_t bits) { + return (bits >= UINT64_C(0x80000000)) + ? (-static_cast(UnsignedNegate(bits) - 1) - 1) + : static_cast(bits); +} + +namespace internal { + +// Internal simulation class used solely by the simulator to +// provide an abstraction layer for any half-precision arithmetic. +class SimFloat16 : public Float16 { + public: + // TODO: We should investigate making this constructor explicit. + // This is currently difficult to do due to a number of templated + // functions in the simulator which rely on returning double values. + SimFloat16(double dvalue) : Float16(dvalue) {} // NOLINT(runtime/explicit) + SimFloat16(Float16 f) { // NOLINT(runtime/explicit) + this->rawbits_ = Float16ToRawbits(f); + } + SimFloat16() : Float16() {} + SimFloat16 operator-() const; + SimFloat16 operator+(SimFloat16 rhs) const; + SimFloat16 operator-(SimFloat16 rhs) const; + SimFloat16 operator*(SimFloat16 rhs) const; + SimFloat16 operator/(SimFloat16 rhs) const; + bool operator<(SimFloat16 rhs) const; + bool operator>(SimFloat16 rhs) const; + bool operator==(SimFloat16 rhs) const; + bool operator!=(SimFloat16 rhs) const; + // This is necessary for conversions performed in (macro asm) Fmov. + bool operator==(double rhs) const; + operator double() const; +}; +} // namespace internal + +uint32_t Float16Sign(internal::SimFloat16 value); + +uint32_t Float16Exp(internal::SimFloat16 value); + +uint32_t Float16Mantissa(internal::SimFloat16 value); + +uint32_t FloatSign(float value); +VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) { + return FloatSign(value); +} + +uint32_t FloatExp(float value); +VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) { + return FloatExp(value); +} + +uint32_t FloatMantissa(float value); +VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) { + return FloatMantissa(value); +} + +uint32_t DoubleSign(double value); +VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) { + return DoubleSign(value); +} + +uint32_t DoubleExp(double value); +VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) { + return DoubleExp(value); +} + +uint64_t DoubleMantissa(double value); +VIXL_DEPRECATED("DoubleMantissa", + inline uint64_t double_mantissa(double value)) { + return DoubleMantissa(value); +} + +internal::SimFloat16 Float16Pack(uint16_t sign, + uint16_t exp, + uint16_t mantissa); + +float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa); +VIXL_DEPRECATED("FloatPack", + inline float float_pack(uint32_t sign, + uint32_t exp, + uint32_t mantissa)) { + return FloatPack(sign, exp, mantissa); +} + +double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa); +VIXL_DEPRECATED("DoublePack", + inline double double_pack(uint32_t sign, + uint32_t exp, + uint64_t mantissa)) { + return DoublePack(sign, exp, mantissa); +} + +// An fpclassify() function for 16-bit half-precision floats. +int Float16Classify(Float16 value); +VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) { + return Float16Classify(RawbitsToFloat16(value)); +} + +bool IsZero(Float16 value); + +inline bool IsPositiveZero(double value) { + return (value == 0.0) && (copysign(1.0, value) > 0.0); +} + +inline bool IsNaN(float value) { return std::isnan(value); } + +inline bool IsNaN(double value) { return std::isnan(value); } + +inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; } + +inline bool IsInf(float value) { return std::isinf(value); } + +inline bool IsInf(double value) { return std::isinf(value); } + +inline bool IsInf(Float16 value) { + return Float16Classify(value) == FP_INFINITE; +} + + +// NaN tests. +inline bool IsSignallingNaN(double num) { + const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); + uint64_t raw = DoubleToRawbits(num); + if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +inline bool IsSignallingNaN(float num) { + const uint32_t kFP32QuietNaNMask = 0x00400000; + uint32_t raw = FloatToRawbits(num); + if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) { + return true; + } + return false; +} + + +inline bool IsSignallingNaN(Float16 num) { + const uint16_t kFP16QuietNaNMask = 0x0200; + return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0); +} + + +template +inline bool IsQuietNaN(T num) { + return IsNaN(num) && !IsSignallingNaN(num); +} + + +// Convert the NaN in 'num' to a quiet NaN. +inline double ToQuietNaN(double num) { + const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); + VIXL_ASSERT(IsNaN(num)); + return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask); +} + + +inline float ToQuietNaN(float num) { + const uint32_t kFP32QuietNaNMask = 0x00400000; + VIXL_ASSERT(IsNaN(num)); + return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask); +} + + +inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) { + const uint16_t kFP16QuietNaNMask = 0x0200; + VIXL_ASSERT(IsNaN(num)); + return internal::SimFloat16( + RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask)); +} + + +// Fused multiply-add. +inline double FusedMultiplyAdd(double op1, double op2, double a) { + return fma(op1, op2, a); +} + + +inline float FusedMultiplyAdd(float op1, float op2, float a) { + return fmaf(op1, op2, a); +} + + +inline uint64_t LowestSetBit(uint64_t value) { + return value & UnsignedNegate(value); +} + + +template +inline int HighestSetBitPosition(T value) { + VIXL_ASSERT(value != 0); + return (sizeof(value) * 8 - 1) - CountLeadingZeros(value); +} + + +template +inline int WhichPowerOf2(V value) { + VIXL_ASSERT(IsPowerOf2(value)); + return CountTrailingZeros(value); +} + + +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); + + +int BitCount(uint64_t value); + + +template +T ReverseBits(T value) { + VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || + (sizeof(value) == 4) || (sizeof(value) == 8)); + T result = 0; + for (unsigned i = 0; i < (sizeof(value) * 8); i++) { + result = (result << 1) | (value & 1); + value >>= 1; + } + return result; +} + + +template +inline T SignExtend(T val, int size_in_bits) { + VIXL_ASSERT(size_in_bits > 0); + T mask = (T(2) << (size_in_bits - 1)) - T(1); + val &= mask; + T sign_bits = -((val >> (size_in_bits - 1)) << size_in_bits); + val |= sign_bits; + return val; +} + + +template +T ReverseBytes(T value, int block_bytes_log2) { + VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8)); + VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value)); + // Split the 64-bit value into an 8-bit array, where b[0] is the least + // significant byte, and b[7] is the most significant. + uint8_t bytes[8]; + uint64_t mask = UINT64_C(0xff00000000000000); + for (int i = 7; i >= 0; i--) { + bytes[i] = (static_cast(value) & mask) >> (i * 8); + mask >>= 8; + } + + // Permutation tables for REV instructions. + // permute_table[0] is used by REV16_x, REV16_w + // permute_table[1] is used by REV32_x, REV_w + // permute_table[2] is used by REV_x + VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4)); + static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1}, + {4, 5, 6, 7, 0, 1, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 7}}; + uint64_t temp = 0; + for (int i = 0; i < 8; i++) { + temp <<= 8; + temp |= bytes[permute_table[block_bytes_log2 - 1][i]]; + } + + T result; + VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp)); + memcpy(&result, &temp, sizeof(result)); + return result; +} + +template +inline bool IsMultiple(T value) { + VIXL_ASSERT(IsPowerOf2(MULTIPLE)); + return (value & (MULTIPLE - 1)) == 0; +} + +template +inline bool IsMultiple(T value, unsigned multiple) { + VIXL_ASSERT(IsPowerOf2(multiple)); + return (value & (multiple - 1)) == 0; +} + +template +inline bool IsAligned(T pointer, int alignment) { + VIXL_ASSERT(IsPowerOf2(alignment)); + return (pointer & (alignment - 1)) == 0; +} + +// Pointer alignment +// TODO: rename/refactor to make it specific to instructions. +template +inline bool IsAligned(T pointer) { + VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t)); // NOLINT(runtime/sizeof) + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + return IsAligned((intptr_t)(pointer), ALIGN); +} + +template +bool IsWordAligned(T pointer) { + return IsAligned<4>(pointer); +} + +// Increment a pointer until it has the specified alignment. The alignment must +// be a power of two. +template +T AlignUp(T pointer, + typename Unsigned::type alignment) { + VIXL_ASSERT(IsPowerOf2(alignment)); + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + + typename Unsigned::type pointer_raw = + (typename Unsigned::type) pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); + + size_t mask = alignment - 1; + T result = (T)((pointer_raw + mask) & ~mask); + VIXL_ASSERT(result >= pointer); + + return result; +} + +// Decrement a pointer until it has the specified alignment. The alignment must +// be a power of two. +template +T AlignDown(T pointer, + typename Unsigned::type alignment) { + VIXL_ASSERT(IsPowerOf2(alignment)); + // Use C-style casts to get static_cast behaviour for integral types (T), and + // reinterpret_cast behaviour for other types. + + typename Unsigned::type pointer_raw = + (typename Unsigned::type) pointer; + VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); + + size_t mask = alignment - 1; + return (T)(pointer_raw & ~mask); +} + + +template +inline T ExtractBit(T value, unsigned bit) { + return (value >> bit) & T(1); +} + +template +inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) { + return Td((value >> least_significant_bit) & Ts(mask)); +} + +template +inline void AssignBit(Td& dst, // NOLINT(runtime/references) + int bit, + Ts value) { + VIXL_ASSERT((value == Ts(0)) || (value == Ts(1))); + VIXL_ASSERT(bit >= 0); + VIXL_ASSERT(bit < static_cast(sizeof(Td) * 8)); + Td mask(1); + dst &= ~(mask << bit); + dst |= Td(value) << bit; +} + +template +inline void AssignBits(Td& dst, // NOLINT(runtime/references) + int least_significant_bit, + Ts mask, + Ts value) { + VIXL_ASSERT(least_significant_bit >= 0); + VIXL_ASSERT(least_significant_bit < static_cast(sizeof(Td) * 8)); + VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) == + Td(mask)); + VIXL_ASSERT((value & mask) == value); + dst &= ~(Td(mask) << least_significant_bit); + dst |= Td(value) << least_significant_bit; +} + +class VFP { + public: + static uint32_t FP32ToImm8(float imm) { + // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 + uint32_t bits = FloatToRawbits(imm); + // bit7: a000.0000 + uint32_t bit7 = ((bits >> 31) & 0x1) << 7; + // bit6: 0b00.0000 + uint32_t bit6 = ((bits >> 29) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint32_t bit5_to_0 = (bits >> 19) & 0x3f; + return static_cast(bit7 | bit6 | bit5_to_0); + } + static uint32_t FP64ToImm8(double imm) { + // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + uint64_t bits = DoubleToRawbits(imm); + // bit7: a000.0000 + uint64_t bit7 = ((bits >> 63) & 0x1) << 7; + // bit6: 0b00.0000 + uint64_t bit6 = ((bits >> 61) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint64_t bit5_to_0 = (bits >> 48) & 0x3f; + + return static_cast(bit7 | bit6 | bit5_to_0); + } + static float Imm8ToFP32(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint32_t bit7 = (bits >> 7) & 0x1; + uint32_t bit6 = (bits >> 6) & 0x1; + uint32_t bit5_to_0 = bits & 0x3f; + uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19); + + return RawbitsToFloat(result); + } + static double Imm8ToFP64(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint64_t bit7 = (bits >> 7) & 0x1; + uint64_t bit6 = (bits >> 6) & 0x1; + uint64_t bit5_to_0 = bits & 0x3f; + uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48); + return RawbitsToDouble(result); + } + static bool IsImmFP32(float imm) { + // Valid values will have the form: + // aBbb.bbbc.defg.h000.0000.0000.0000.0000 + uint32_t bits = FloatToRawbits(imm); + // bits[19..0] are cleared. + if ((bits & 0x7ffff) != 0) { + return false; + } + + + // bits[29..25] are all set or all cleared. + uint32_t b_pattern = (bits >> 16) & 0x3e00; + if (b_pattern != 0 && b_pattern != 0x3e00) { + return false; + } + // bit[30] and bit[29] are opposite. + if (((bits ^ (bits << 1)) & 0x40000000) == 0) { + return false; + } + return true; + } + static bool IsImmFP64(double imm) { + // Valid values will have the form: + // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + uint64_t bits = DoubleToRawbits(imm); + // bits[47..0] are cleared. + if ((bits & 0x0000ffffffffffff) != 0) { + return false; + } + // bits[61..54] are all set or all cleared. + uint32_t b_pattern = (bits >> 48) & 0x3fc0; + if ((b_pattern != 0) && (b_pattern != 0x3fc0)) { + return false; + } + // bit[62] and bit[61] are opposite. + if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) { + return false; + } + return true; + } +}; + +class BitField { + // ForEachBitHelper is a functor that will call + // bool ForEachBitHelper::execute(ElementType id) const + // and expects a boolean in return whether to continue (if true) + // or stop (if false) + // check_set will check if the bits are on (true) or off(false) + template + bool ForEachBit(const ForEachBitHelper& helper) { + for (int i = 0; static_cast(i) < bitfield_.size(); i++) { + if (bitfield_[i] == check_set) + if (!helper.execute(i)) return false; + } + return true; + } + + public: + explicit BitField(unsigned size) : bitfield_(size, 0) {} + + void Set(int i) { + VIXL_ASSERT((i >= 0) && (static_cast(i) < bitfield_.size())); + bitfield_[i] = true; + } + + void Unset(int i) { + VIXL_ASSERT((i >= 0) && (static_cast(i) < bitfield_.size())); + bitfield_[i] = true; + } + + bool IsSet(int i) const { return bitfield_[i]; } + + // For each bit not set in the bitfield call the execute functor + // execute. + // ForEachBitSetHelper::execute returns true if the iteration through + // the bits can continue, otherwise it will stop. + // struct ForEachBitSetHelper { + // bool execute(int /*id*/) { return false; } + // }; + template + bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) { + return ForEachBit(helper); + } + + // For each bit set in the bitfield call the execute functor + // execute. + template + bool ForEachBitSet(const ForEachBitSetHelper& helper) { + return ForEachBit(helper); + } + + private: + std::vector bitfield_; +}; + +namespace internal { + +typedef int64_t Int64; +class Uint64; +class Uint128; + +class Uint32 { + uint32_t data_; + + public: + // Unlike uint32_t, Uint32 has a default constructor. + Uint32() { data_ = 0; } + explicit Uint32(uint32_t data) : data_(data) {} + inline explicit Uint32(Uint64 data); + uint32_t Get() const { return data_; } + template + int32_t GetSigned() const { + return ExtractSignedBitfield32(N - 1, 0, data_); + } + int32_t GetSigned() const { return data_; } + Uint32 operator~() const { return Uint32(~data_); } + Uint32 operator-() const { return Uint32(UnsignedNegate(data_)); } + bool operator==(Uint32 value) const { return data_ == value.data_; } + bool operator!=(Uint32 value) const { return data_ != value.data_; } + bool operator>(Uint32 value) const { return data_ > value.data_; } + Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); } + Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); } + Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); } + Uint32 operator&=(Uint32 value) { + data_ &= value.data_; + return *this; + } + Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); } + Uint32 operator^=(Uint32 value) { + data_ ^= value.data_; + return *this; + } + Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); } + Uint32 operator|=(Uint32 value) { + data_ |= value.data_; + return *this; + } + // Unlike uint32_t, the shift functions can accept negative shift and + // return 0 when the shift is too big. + Uint32 operator>>(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ << tmp); + } + int tmp = shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ >> tmp); + } + Uint32 operator<<(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ >> tmp); + } + int tmp = shift; + if (tmp >= 32) return Uint32(0); + return Uint32(data_ << tmp); + } +}; + +class Uint64 { + uint64_t data_; + + public: + // Unlike uint64_t, Uint64 has a default constructor. + Uint64() { data_ = 0; } + explicit Uint64(uint64_t data) : data_(data) {} + explicit Uint64(Uint32 data) : data_(data.Get()) {} + inline explicit Uint64(Uint128 data); + uint64_t Get() const { return data_; } + int64_t GetSigned(int N) const { + return ExtractSignedBitfield64(N - 1, 0, data_); + } + int64_t GetSigned() const { return data_; } + Uint32 ToUint32() const { + VIXL_ASSERT((data_ >> 32) == 0); + return Uint32(static_cast(data_)); + } + Uint32 GetHigh32() const { return Uint32(data_ >> 32); } + Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); } + Uint64 operator~() const { return Uint64(~data_); } + Uint64 operator-() const { return Uint64(UnsignedNegate(data_)); } + bool operator==(Uint64 value) const { return data_ == value.data_; } + bool operator!=(Uint64 value) const { return data_ != value.data_; } + Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); } + Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); } + Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); } + Uint64 operator&=(Uint64 value) { + data_ &= value.data_; + return *this; + } + Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); } + Uint64 operator^=(Uint64 value) { + data_ ^= value.data_; + return *this; + } + Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); } + Uint64 operator|=(Uint64 value) { + data_ |= value.data_; + return *this; + } + // Unlike uint64_t, the shift functions can accept negative shift and + // return 0 when the shift is too big. + Uint64 operator>>(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ << tmp); + } + int tmp = shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ >> tmp); + } + Uint64 operator<<(int shift) const { + if (shift == 0) return *this; + if (shift < 0) { + int tmp = -shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ >> tmp); + } + int tmp = shift; + if (tmp >= 64) return Uint64(0); + return Uint64(data_ << tmp); + } +}; + +class Uint128 { + uint64_t data_high_; + uint64_t data_low_; + + public: + Uint128() : data_high_(0), data_low_(0) {} + explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {} + explicit Uint128(Uint64 data_low) + : data_high_(0), data_low_(data_low.Get()) {} + Uint128(uint64_t data_high, uint64_t data_low) + : data_high_(data_high), data_low_(data_low) {} + Uint64 ToUint64() const { + VIXL_ASSERT(data_high_ == 0); + return Uint64(data_low_); + } + Uint64 GetHigh64() const { return Uint64(data_high_); } + Uint64 GetLow64() const { return Uint64(data_low_); } + Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); } + bool operator==(Uint128 value) const { + return (data_high_ == value.data_high_) && (data_low_ == value.data_low_); + } + Uint128 operator&(Uint128 value) const { + return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_); + } + Uint128 operator&=(Uint128 value) { + data_high_ &= value.data_high_; + data_low_ &= value.data_low_; + return *this; + } + Uint128 operator|=(Uint128 value) { + data_high_ |= value.data_high_; + data_low_ |= value.data_low_; + return *this; + } + Uint128 operator>>(int shift) const { + VIXL_ASSERT((shift >= 0) && (shift < 128)); + if (shift == 0) return *this; + if (shift >= 64) { + return Uint128(0, data_high_ >> (shift - 64)); + } + uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift); + return Uint128(data_high_ >> shift, tmp); + } + Uint128 operator<<(int shift) const { + VIXL_ASSERT((shift >= 0) && (shift < 128)); + if (shift == 0) return *this; + if (shift >= 64) { + return Uint128(data_low_ << (shift - 64), 0); + } + uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift)); + return Uint128(tmp, data_low_ << shift); + } +}; + +Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {} +Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {} + +Int64 BitCount(Uint32 value); + +// The algorithm used is adapted from the one described in section 8.2 of +// Hacker's Delight, by Henry S. Warren, Jr. +template +int64_t MultiplyHigh(T u, T v) { + uint64_t u0, v0, w0, u1, v1, w1, w2, t; + VIXL_STATIC_ASSERT((N == 8) || (N == 16) || (N == 32) || (N == 64)); + uint64_t sign_mask = UINT64_C(1) << (N - 1); + uint64_t sign_ext = 0; + unsigned half_bits = N / 2; + uint64_t half_mask = GetUintMask(half_bits); + if (std::numeric_limits::is_signed) { + sign_ext = UINT64_C(0xffffffffffffffff) << half_bits; + } + + VIXL_ASSERT(sizeof(u) == sizeof(uint64_t)); + VIXL_ASSERT(sizeof(u) == sizeof(u0)); + + u0 = u & half_mask; + u1 = u >> half_bits | (((u & sign_mask) != 0) ? sign_ext : 0); + v0 = v & half_mask; + v1 = v >> half_bits | (((v & sign_mask) != 0) ? sign_ext : 0); + + w0 = u0 * v0; + t = u1 * v0 + (w0 >> half_bits); + + w1 = t & half_mask; + w2 = t >> half_bits | (((t & sign_mask) != 0) ? sign_ext : 0); + w1 = u0 * v1 + w1; + w1 = w1 >> half_bits | (((w1 & sign_mask) != 0) ? sign_ext : 0); + + uint64_t value = u1 * v1 + w2 + w1; + int64_t result; + memcpy(&result, &value, sizeof(result)); + return result; +} + +} // namespace internal + +// The default NaN values (for FPCR.DN=1). +extern const double kFP64DefaultNaN; +extern const float kFP32DefaultNaN; +extern const Float16 kFP16DefaultNaN; + +// Floating-point infinity values. +extern const Float16 kFP16PositiveInfinity; +extern const Float16 kFP16NegativeInfinity; +extern const float kFP32PositiveInfinity; +extern const float kFP32NegativeInfinity; +extern const double kFP64PositiveInfinity; +extern const double kFP64NegativeInfinity; + +// Floating-point zero values. +extern const Float16 kFP16PositiveZero; +extern const Float16 kFP16NegativeZero; + +// AArch64 floating-point specifics. These match IEEE-754. +const unsigned kDoubleMantissaBits = 52; +const unsigned kDoubleExponentBits = 11; +const unsigned kFloatMantissaBits = 23; +const unsigned kFloatExponentBits = 8; +const unsigned kFloat16MantissaBits = 10; +const unsigned kFloat16ExponentBits = 5; + +enum FPRounding { + // The first four values are encodable directly by FPCR. + FPTieEven = 0x0, + FPPositiveInfinity = 0x1, + FPNegativeInfinity = 0x2, + FPZero = 0x3, + + // The final rounding modes are only available when explicitly specified by + // the instruction (such as with fcvta). It cannot be set in FPCR. + FPTieAway, + FPRoundOdd +}; + +enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN }; + +// Assemble the specified IEEE-754 components into the target type and apply +// appropriate rounding. +// sign: 0 = positive, 1 = negative +// exponent: Unbiased IEEE-754 exponent. +// mantissa: The mantissa of the input. The top bit (which is not encoded for +// normal IEEE-754 values) must not be omitted. This bit has the +// value 'pow(2, exponent)'. +// +// The input value is assumed to be a normalized value. That is, the input may +// not be infinity or NaN. If the source value is subnormal, it must be +// normalized before calling this function such that the highest set bit in the +// mantissa has the value 'pow(2, exponent)'. +// +// Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than +// calling a templated FPRound. +template +T FPRound(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + VIXL_ASSERT((sign == 0) || (sign == 1)); + + // Only FPTieEven and FPRoundOdd rounding modes are implemented. + VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); + + // Rounding can promote subnormals to normals, and normals to infinities. For + // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be + // encodable as a float, but rounding based on the low-order mantissa bits + // could make it overflow. With ties-to-even rounding, this value would become + // an infinity. + + // ---- Rounding Method ---- + // + // The exponent is irrelevant in the rounding operation, so we treat the + // lowest-order bit that will fit into the result ('onebit') as having + // the value '1'. Similarly, the highest-order bit that won't fit into + // the result ('halfbit') has the value '0.5'. The 'point' sits between + // 'onebit' and 'halfbit': + // + // These bits fit into the result. + // |---------------------| + // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + // || + // / | + // / halfbit + // onebit + // + // For subnormal outputs, the range of representable bits is smaller and + // the position of onebit and halfbit depends on the exponent of the + // input, but the method is otherwise similar. + // + // onebit(frac) + // | + // | halfbit(frac) halfbit(adjusted) + // | / / + // | | | + // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00 + // 0b00.0... -> 0b00.0... -> 0b00 + // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00 + // 0b00.1... -> 0b00.1... -> 0b01 + // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01 + // 0b01.0... -> 0b01.0... -> 0b01 + // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10 + // 0b01.1... -> 0b01.1... -> 0b10 + // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10 + // 0b10.0... -> 0b10.0... -> 0b10 + // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10 + // 0b10.1... -> 0b10.1... -> 0b11 + // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11 + // ... / | / | + // / | / | + // / | + // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / | + // + // mantissa = (mantissa >> shift) + halfbit(adjusted); + + static const int mantissa_offset = 0; + static const int exponent_offset = mantissa_offset + mbits; + static const int sign_offset = exponent_offset + ebits; + VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1)); + + // Bail out early for zero inputs. + if (mantissa == 0) { + return static_cast(sign << sign_offset); + } + + // If all bits in the exponent are set, the value is infinite or NaN. + // This is true for all binary IEEE-754 formats. + static const int infinite_exponent = (1 << ebits) - 1; + static const int max_normal_exponent = infinite_exponent - 1; + + // Apply the exponent bias to encode it for the result. Doing this early makes + // it easy to detect values that will be infinite or subnormal. + exponent += max_normal_exponent >> 1; + + if (exponent > max_normal_exponent) { + // Overflow: the input is too large for the result type to represent. + if (round_mode == FPTieEven) { + // FPTieEven rounding mode handles overflows using infinities. + exponent = infinite_exponent; + mantissa = 0; + } else { + VIXL_ASSERT(round_mode == FPRoundOdd); + // FPRoundOdd rounding mode handles overflows using the largest magnitude + // normal number. + exponent = max_normal_exponent; + mantissa = (UINT64_C(1) << exponent_offset) - 1; + } + return static_cast((sign << sign_offset) | + (exponent << exponent_offset) | + (mantissa << mantissa_offset)); + } + + // Calculate the shift required to move the top mantissa bit to the proper + // place in the destination type. + const int highest_significant_bit = 63 - CountLeadingZeros(mantissa); + int shift = highest_significant_bit - mbits; + + if (exponent <= 0) { + // The output will be subnormal (before rounding). + // For subnormal outputs, the shift must be adjusted by the exponent. The +1 + // is necessary because the exponent of a subnormal value (encoded as 0) is + // the same as the exponent of the smallest normal value (encoded as 1). + shift += static_cast(-exponent + 1); + + // Handle inputs that would produce a zero output. + // + // Shifts higher than highest_significant_bit+1 will always produce a zero + // result. A shift of exactly highest_significant_bit+1 might produce a + // non-zero result after rounding. + if (shift > (highest_significant_bit + 1)) { + if (round_mode == FPTieEven) { + // The result will always be +/-0.0. + return static_cast(sign << sign_offset); + } else { + VIXL_ASSERT(round_mode == FPRoundOdd); + VIXL_ASSERT(mantissa != 0); + // For FPRoundOdd, if the mantissa is too small to represent and + // non-zero return the next "odd" value. + return static_cast((sign << sign_offset) | 1); + } + } + + // Properly encode the exponent for a subnormal output. + exponent = 0; + } else { + // Clear the topmost mantissa bit, since this is not encoded in IEEE-754 + // normal values. + mantissa &= ~(UINT64_C(1) << highest_significant_bit); + } + + // The casts below are only well-defined for unsigned integers. + VIXL_STATIC_ASSERT(std::numeric_limits::is_integer); + VIXL_STATIC_ASSERT(!std::numeric_limits::is_signed); + + if (shift > 0) { + if (round_mode == FPTieEven) { + // We have to shift the mantissa to the right. Some precision is lost, so + // we need to apply rounding. + uint64_t onebit_mantissa = (mantissa >> (shift)) & 1; + uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1; + uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa); + uint64_t adjusted = mantissa - adjustment; + T halfbit_adjusted = (adjusted >> (shift - 1)) & 1; + + T result = + static_cast((sign << sign_offset) | (exponent << exponent_offset) | + ((mantissa >> shift) << mantissa_offset)); + + // A very large mantissa can overflow during rounding. If this happens, + // the exponent should be incremented and the mantissa set to 1.0 + // (encoded as 0). Applying halfbit_adjusted after assembling the float + // has the nice side-effect that this case is handled for free. + // + // This also handles cases where a very large finite value overflows to + // infinity, or where a very large subnormal value overflows to become + // normal. + return result + halfbit_adjusted; + } else { + VIXL_ASSERT(round_mode == FPRoundOdd); + // If any bits at position halfbit or below are set, onebit (ie. the + // bottom bit of the resulting mantissa) must be set. + uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1); + if (fractional_bits != 0) { + mantissa |= UINT64_C(1) << shift; + } + + return static_cast((sign << sign_offset) | + (exponent << exponent_offset) | + ((mantissa >> shift) << mantissa_offset)); + } + } else { + // We have to shift the mantissa to the left (or not at all). The input + // mantissa is exactly representable in the output mantissa, so apply no + // rounding correction. + return static_cast((sign << sign_offset) | + (exponent << exponent_offset) | + ((mantissa << -shift) << mantissa_offset)); + } +} + + +// See FPRound for a description of this function. +inline double FPRoundToDouble(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + uint64_t bits = + FPRound(sign, + exponent, + mantissa, + round_mode); + return RawbitsToDouble(bits); +} + + +// See FPRound for a description of this function. +inline Float16 FPRoundToFloat16(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + return RawbitsToFloat16( + FPRound( + sign, exponent, mantissa, round_mode)); +} + + +// See FPRound for a description of this function. +static inline float FPRoundToFloat(int64_t sign, + int64_t exponent, + uint64_t mantissa, + FPRounding round_mode) { + uint32_t bits = + FPRound(sign, + exponent, + mantissa, + round_mode); + return RawbitsToFloat(bits); +} + + +float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL); +float FPToFloat(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); + +double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL); +double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL); + +Float16 FPToFloat16(float value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); + +Float16 FPToFloat16(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception = NULL); + +// Like static_cast(value), but with specialisations for the Float16 type. +template +T StaticCastFPTo(F value) { + return static_cast(value); +} + +template <> +inline float StaticCastFPTo(Float16 value) { + return FPToFloat(value, kIgnoreDefaultNaN); +} + +template <> +inline double StaticCastFPTo(Float16 value) { + return FPToDouble(value, kIgnoreDefaultNaN); +} + +template <> +inline Float16 StaticCastFPTo(float value) { + return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); +} + +template <> +inline Float16 StaticCastFPTo(double value) { + return FPToFloat16(value, FPTieEven, kIgnoreDefaultNaN); +} + +template +uint64_t FPToRawbitsWithSize(unsigned size_in_bits, T value) { + switch (size_in_bits) { + case 16: + return Float16ToRawbits(StaticCastFPTo(value)); + case 32: + return FloatToRawbits(StaticCastFPTo(value)); + case 64: + return DoubleToRawbits(StaticCastFPTo(value)); + } + VIXL_UNREACHABLE(); + return 0; +} + +template +T RawbitsWithSizeToFP(unsigned size_in_bits, uint64_t value) { + VIXL_ASSERT(IsUintN(size_in_bits, value)); + switch (size_in_bits) { + case 16: + return StaticCastFPTo(RawbitsToFloat16(static_cast(value))); + case 32: + return StaticCastFPTo(RawbitsToFloat(static_cast(value))); + case 64: + return StaticCastFPTo(RawbitsToDouble(value)); + } + VIXL_UNREACHABLE(); + return 0; +} + +// Jenkins one-at-a-time hash, based on +// https://en.wikipedia.org/wiki/Jenkins_hash_function citing +// https://www.drdobbs.com/database/algorithm-alley/184410284. +constexpr uint32_t Hash(const char* str, uint32_t hash = 0) { + if (*str == '\0') { + hash += hash << 3; + hash ^= hash >> 11; + hash += hash << 15; + return hash; + } else { + hash += *str; + hash += hash << 10; + hash ^= hash >> 6; + return Hash(str + 1, hash); + } +} + +constexpr uint32_t operator"" _h(const char* x, size_t) { return Hash(x); } + +} // namespace vixl + +#endif // VIXL_UTILS_H diff --git a/3rdparty/vixl/src/aarch64/assembler-aarch64.cc b/3rdparty/vixl/src/aarch64/assembler-aarch64.cc new file mode 100644 index 0000000000..993d854cd1 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/assembler-aarch64.cc @@ -0,0 +1,6888 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#include + +#include "assembler-aarch64.h" +#include "macro-assembler-aarch64.h" + +namespace vixl { +namespace aarch64 { + +RawLiteral::RawLiteral(size_t size, + LiteralPool* literal_pool, + DeletionPolicy deletion_policy) + : size_(size), + offset_(0), + low64_(0), + high64_(0), + literal_pool_(literal_pool), + deletion_policy_(deletion_policy) { + VIXL_ASSERT((deletion_policy == kManuallyDeleted) || (literal_pool_ != NULL)); + if (deletion_policy == kDeletedOnPoolDestruction) { + literal_pool_->DeleteOnDestruction(this); + } +} + + +void Assembler::Reset() { GetBuffer()->Reset(); } + + +void Assembler::bind(Label* label) { + BindToOffset(label, GetBuffer()->GetCursorOffset()); +} + + +void Assembler::BindToOffset(Label* label, ptrdiff_t offset) { + VIXL_ASSERT((offset >= 0) && (offset <= GetBuffer()->GetCursorOffset())); + VIXL_ASSERT(offset % kInstructionSize == 0); + + label->Bind(offset); + + for (Label::LabelLinksIterator it(label); !it.Done(); it.Advance()) { + Instruction* link = + GetBuffer()->GetOffsetAddress(*it.Current()); + link->SetImmPCOffsetTarget(GetLabelAddress(label)); + } + label->ClearAllLinks(); +} + + +// A common implementation for the LinkAndGetOffsetTo helpers. +// +// The offset is calculated by aligning the PC and label addresses down to a +// multiple of 1 << element_shift, then calculating the (scaled) offset between +// them. This matches the semantics of adrp, for example. +template +ptrdiff_t Assembler::LinkAndGetOffsetTo(Label* label) { + VIXL_STATIC_ASSERT(element_shift < (sizeof(ptrdiff_t) * 8)); + + if (label->IsBound()) { + uintptr_t pc_offset = GetCursorAddress() >> element_shift; + uintptr_t label_offset = GetLabelAddress(label) >> element_shift; + return label_offset - pc_offset; + } else { + label->AddLink(GetBuffer()->GetCursorOffset()); + return 0; + } +} + + +ptrdiff_t Assembler::LinkAndGetByteOffsetTo(Label* label) { + return LinkAndGetOffsetTo<0>(label); +} + + +ptrdiff_t Assembler::LinkAndGetInstructionOffsetTo(Label* label) { + return LinkAndGetOffsetTo(label); +} + + +ptrdiff_t Assembler::LinkAndGetPageOffsetTo(Label* label) { + return LinkAndGetOffsetTo(label); +} + + +void Assembler::place(RawLiteral* literal) { + VIXL_ASSERT(!literal->IsPlaced()); + + // Patch instructions using this literal. + if (literal->IsUsed()) { + Instruction* target = GetCursorAddress(); + ptrdiff_t offset = literal->GetLastUse(); + bool done; + do { + Instruction* ldr = GetBuffer()->GetOffsetAddress(offset); + VIXL_ASSERT(ldr->IsLoadLiteral()); + + ptrdiff_t imm19 = ldr->GetImmLLiteral(); + VIXL_ASSERT(imm19 <= 0); + done = (imm19 == 0); + offset += imm19 * kLiteralEntrySize; + + ldr->SetImmLLiteral(target); + } while (!done); + } + + // "bind" the literal. + literal->SetOffset(GetCursorOffset()); + // Copy the data into the pool. + switch (literal->GetSize()) { + case kSRegSizeInBytes: + dc32(literal->GetRawValue32()); + break; + case kDRegSizeInBytes: + dc64(literal->GetRawValue64()); + break; + default: + VIXL_ASSERT(literal->GetSize() == kQRegSizeInBytes); + dc64(literal->GetRawValue128Low64()); + dc64(literal->GetRawValue128High64()); + } + + literal->literal_pool_ = NULL; +} + + +ptrdiff_t Assembler::LinkAndGetWordOffsetTo(RawLiteral* literal) { + VIXL_ASSERT(IsWordAligned(GetCursorOffset())); + + bool register_first_use = + (literal->GetLiteralPool() != NULL) && !literal->IsUsed(); + + if (literal->IsPlaced()) { + // The literal is "behind", the offset will be negative. + VIXL_ASSERT((literal->GetOffset() - GetCursorOffset()) <= 0); + return (literal->GetOffset() - GetCursorOffset()) >> kLiteralEntrySizeLog2; + } + + ptrdiff_t offset = 0; + // Link all uses together. + if (literal->IsUsed()) { + offset = + (literal->GetLastUse() - GetCursorOffset()) >> kLiteralEntrySizeLog2; + } + literal->SetLastUse(GetCursorOffset()); + + if (register_first_use) { + literal->GetLiteralPool()->AddEntry(literal); + } + + return offset; +} + + +// Code generation. +void Assembler::br(const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + Emit(BR | Rn(xn)); +} + + +void Assembler::blr(const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + Emit(BLR | Rn(xn)); +} + + +void Assembler::ret(const Register& xn) { + VIXL_ASSERT(xn.Is64Bits()); + Emit(RET | Rn(xn)); +} + + +void Assembler::braaz(const Register& xn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits()); + Emit(BRAAZ | Rn(xn) | Rd_mask); +} + +void Assembler::brabz(const Register& xn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits()); + Emit(BRABZ | Rn(xn) | Rd_mask); +} + +void Assembler::blraaz(const Register& xn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits()); + Emit(BLRAAZ | Rn(xn) | Rd_mask); +} + +void Assembler::blrabz(const Register& xn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits()); + Emit(BLRABZ | Rn(xn) | Rd_mask); +} + +void Assembler::retaa() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(RETAA | Rn_mask | Rd_mask); +} + +void Assembler::retab() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(RETAB | Rn_mask | Rd_mask); +} + +// The Arm ARM names the register Xm but encodes it in the Xd bitfield. +void Assembler::braa(const Register& xn, const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits()); + Emit(BRAA | Rn(xn) | RdSP(xm)); +} + +void Assembler::brab(const Register& xn, const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits()); + Emit(BRAB | Rn(xn) | RdSP(xm)); +} + +void Assembler::blraa(const Register& xn, const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits()); + Emit(BLRAA | Rn(xn) | RdSP(xm)); +} + +void Assembler::blrab(const Register& xn, const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xn.Is64Bits() && xm.Is64Bits()); + Emit(BLRAB | Rn(xn) | RdSP(xm)); +} + + +void Assembler::b(int64_t imm26) { Emit(B | ImmUncondBranch(imm26)); } + + +void Assembler::b(int64_t imm19, Condition cond) { + Emit(B_cond | ImmCondBranch(imm19) | cond); +} + + +void Assembler::b(Label* label) { + int64_t offset = LinkAndGetInstructionOffsetTo(label); + VIXL_ASSERT(Instruction::IsValidImmPCOffset(UncondBranchType, offset)); + b(static_cast(offset)); +} + + +void Assembler::b(Label* label, Condition cond) { + int64_t offset = LinkAndGetInstructionOffsetTo(label); + VIXL_ASSERT(Instruction::IsValidImmPCOffset(CondBranchType, offset)); + b(static_cast(offset), cond); +} + + +void Assembler::bl(int64_t imm26) { Emit(BL | ImmUncondBranch(imm26)); } + + +void Assembler::bl(Label* label) { + int64_t offset = LinkAndGetInstructionOffsetTo(label); + VIXL_ASSERT(Instruction::IsValidImmPCOffset(UncondBranchType, offset)); + bl(static_cast(offset)); +} + + +void Assembler::cbz(const Register& rt, int64_t imm19) { + Emit(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt)); +} + + +void Assembler::cbz(const Register& rt, Label* label) { + int64_t offset = LinkAndGetInstructionOffsetTo(label); + VIXL_ASSERT(Instruction::IsValidImmPCOffset(CompareBranchType, offset)); + cbz(rt, static_cast(offset)); +} + + +void Assembler::cbnz(const Register& rt, int64_t imm19) { + Emit(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt)); +} + + +void Assembler::cbnz(const Register& rt, Label* label) { + int64_t offset = LinkAndGetInstructionOffsetTo(label); + VIXL_ASSERT(Instruction::IsValidImmPCOffset(CompareBranchType, offset)); + cbnz(rt, static_cast(offset)); +} + + +void Assembler::NEONTable(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONTableOp op) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.Is16B() || vd.Is8B()); + VIXL_ASSERT(vn.Is16B()); + VIXL_ASSERT(AreSameFormat(vd, vm)); + Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONTable(vd, vn, vm, NEON_TBL_1v); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + USE(vn2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vn, vn2)); + VIXL_ASSERT(AreConsecutive(vn, vn2)); + NEONTable(vd, vn, vm, NEON_TBL_2v); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + USE(vn2, vn3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3)); + VIXL_ASSERT(AreConsecutive(vn, vn2, vn3)); + NEONTable(vd, vn, vm, NEON_TBL_3v); +} + + +void Assembler::tbl(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + USE(vn2, vn3, vn4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4)); + VIXL_ASSERT(AreConsecutive(vn, vn2, vn3, vn4)); + NEONTable(vd, vn, vm, NEON_TBL_4v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONTable(vd, vn, vm, NEON_TBX_1v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vm) { + USE(vn2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vn, vn2)); + VIXL_ASSERT(AreConsecutive(vn, vn2)); + NEONTable(vd, vn, vm, NEON_TBX_2v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vm) { + USE(vn2, vn3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3)); + VIXL_ASSERT(AreConsecutive(vn, vn2, vn3)); + NEONTable(vd, vn, vm, NEON_TBX_3v); +} + + +void Assembler::tbx(const VRegister& vd, + const VRegister& vn, + const VRegister& vn2, + const VRegister& vn3, + const VRegister& vn4, + const VRegister& vm) { + USE(vn2, vn3, vn4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vn, vn2, vn3, vn4)); + VIXL_ASSERT(AreConsecutive(vn, vn2, vn3, vn4)); + NEONTable(vd, vn, vm, NEON_TBX_4v); +} + + +void Assembler::tbz(const Register& rt, unsigned bit_pos, int64_t imm14) { + VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); + Emit(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt)); +} + + +void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) { + ptrdiff_t offset = LinkAndGetInstructionOffsetTo(label); + VIXL_ASSERT(Instruction::IsValidImmPCOffset(TestBranchType, offset)); + tbz(rt, bit_pos, static_cast(offset)); +} + + +void Assembler::tbnz(const Register& rt, unsigned bit_pos, int64_t imm14) { + VIXL_ASSERT(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSize))); + Emit(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt)); +} + + +void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) { + ptrdiff_t offset = LinkAndGetInstructionOffsetTo(label); + VIXL_ASSERT(Instruction::IsValidImmPCOffset(TestBranchType, offset)); + tbnz(rt, bit_pos, static_cast(offset)); +} + + +void Assembler::adr(const Register& xd, int64_t imm21) { + VIXL_ASSERT(xd.Is64Bits()); + Emit(ADR | ImmPCRelAddress(imm21) | Rd(xd)); +} + + +void Assembler::adr(const Register& xd, Label* label) { + adr(xd, static_cast(LinkAndGetByteOffsetTo(label))); +} + + +void Assembler::adrp(const Register& xd, int64_t imm21) { + VIXL_ASSERT(xd.Is64Bits()); + Emit(ADRP | ImmPCRelAddress(imm21) | Rd(xd)); +} + + +void Assembler::adrp(const Register& xd, Label* label) { + VIXL_ASSERT(AllowPageOffsetDependentCode()); + adrp(xd, static_cast(LinkAndGetPageOffsetTo(label))); +} + + +void Assembler::add(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, LeaveFlags, ADD); +} + + +void Assembler::adds(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, SetFlags, ADD); +} + + +void Assembler::cmn(const Register& rn, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rn); + adds(zr, rn, operand); +} + + +void Assembler::sub(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, LeaveFlags, SUB); +} + + +void Assembler::subs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSub(rd, rn, operand, SetFlags, SUB); +} + + +void Assembler::cmp(const Register& rn, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rn); + subs(zr, rn, operand); +} + + +void Assembler::neg(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sub(rd, zr, operand); +} + + +void Assembler::negs(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + subs(rd, zr, operand); +} + + +void Assembler::adc(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC); +} + + +void Assembler::adcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, SetFlags, ADC); +} + + +void Assembler::sbc(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC); +} + + +void Assembler::sbcs(const Register& rd, + const Register& rn, + const Operand& operand) { + AddSubWithCarry(rd, rn, operand, SetFlags, SBC); +} + + +void Assembler::rmif(const Register& xn, unsigned rotation, StatusFlags flags) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFlagM)); + VIXL_ASSERT(xn.Is64Bits()); + Emit(RMIF | Rn(xn) | ImmRMIFRotation(rotation) | Nzcv(flags)); +} + + +void Assembler::setf8(const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFlagM)); + Emit(SETF8 | Rn(rn)); +} + + +void Assembler::setf16(const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFlagM)); + Emit(SETF16 | Rn(rn)); +} + + +void Assembler::ngc(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sbc(rd, zr, operand); +} + + +void Assembler::ngcs(const Register& rd, const Operand& operand) { + Register zr = AppropriateZeroRegFor(rd); + sbcs(rd, zr, operand); +} + + +// Logical instructions. +void Assembler::and_(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, AND); +} + + +void Assembler::ands(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, ANDS); +} + + +void Assembler::tst(const Register& rn, const Operand& operand) { + ands(AppropriateZeroRegFor(rn), rn, operand); +} + + +void Assembler::bic(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, BIC); +} + + +void Assembler::bics(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, BICS); +} + + +void Assembler::orr(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, ORR); +} + + +void Assembler::orn(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, ORN); +} + + +void Assembler::eor(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, EOR); +} + + +void Assembler::eon(const Register& rd, + const Register& rn, + const Operand& operand) { + Logical(rd, rn, operand, EON); +} + + +void Assembler::lslv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::lsrv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::asrv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::rorv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +// Bitfield operations. +void Assembler::bfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | BFM | N | ImmR(immr, rd.GetSizeInBits()) | + ImmS(imms, rn.GetSizeInBits()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::sbfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(rd.Is64Bits() || rn.Is32Bits()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | SBFM | N | ImmR(immr, rd.GetSizeInBits()) | + ImmS(imms, rn.GetSizeInBits()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::ubfm(const Register& rd, + const Register& rn, + unsigned immr, + unsigned imms) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | UBFM | N | ImmR(immr, rd.GetSizeInBits()) | + ImmS(imms, rn.GetSizeInBits()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::extr(const Register& rd, + const Register& rn, + const Register& rm, + unsigned lsb) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset); + Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.GetSizeInBits()) | Rn(rn) | + Rd(rd)); +} + + +void Assembler::csel(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSEL); +} + + +void Assembler::csinc(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSINC); +} + + +void Assembler::csinv(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSINV); +} + + +void Assembler::csneg(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond) { + ConditionalSelect(rd, rn, rm, cond, CSNEG); +} + + +void Assembler::cset(const Register& rd, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + Register zr = AppropriateZeroRegFor(rd); + csinc(rd, zr, zr, InvertCondition(cond)); +} + + +void Assembler::csetm(const Register& rd, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + Register zr = AppropriateZeroRegFor(rd); + csinv(rd, zr, zr, InvertCondition(cond)); +} + + +void Assembler::cinc(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + csinc(rd, rn, rn, InvertCondition(cond)); +} + + +void Assembler::cinv(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + csinv(rd, rn, rn, InvertCondition(cond)); +} + + +void Assembler::cneg(const Register& rd, const Register& rn, Condition cond) { + VIXL_ASSERT((cond != al) && (cond != nv)); + csneg(rd, rn, rn, InvertCondition(cond)); +} + + +void Assembler::ConditionalSelect(const Register& rd, + const Register& rn, + const Register& rm, + Condition cond, + ConditionalSelectOp op) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd)); +} + + +void Assembler::ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + ConditionalCompare(rn, operand, nzcv, cond, CCMN); +} + + +void Assembler::ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + ConditionalCompare(rn, operand, nzcv, cond, CCMP); +} + + +void Assembler::DataProcessing3Source(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra, + DataProcessing3SourceOp op) { + Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd)); +} + + +void Assembler::crc32b(const Register& wd, + const Register& wn, + const Register& wm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && wm.Is32Bits()); + Emit(SF(wm) | Rm(wm) | CRC32B | Rn(wn) | Rd(wd)); +} + + +void Assembler::crc32h(const Register& wd, + const Register& wn, + const Register& wm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && wm.Is32Bits()); + Emit(SF(wm) | Rm(wm) | CRC32H | Rn(wn) | Rd(wd)); +} + + +void Assembler::crc32w(const Register& wd, + const Register& wn, + const Register& wm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && wm.Is32Bits()); + Emit(SF(wm) | Rm(wm) | CRC32W | Rn(wn) | Rd(wd)); +} + + +void Assembler::crc32x(const Register& wd, + const Register& wn, + const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && xm.Is64Bits()); + Emit(SF(xm) | Rm(xm) | CRC32X | Rn(wn) | Rd(wd)); +} + + +void Assembler::crc32cb(const Register& wd, + const Register& wn, + const Register& wm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && wm.Is32Bits()); + Emit(SF(wm) | Rm(wm) | CRC32CB | Rn(wn) | Rd(wd)); +} + + +void Assembler::crc32ch(const Register& wd, + const Register& wn, + const Register& wm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && wm.Is32Bits()); + Emit(SF(wm) | Rm(wm) | CRC32CH | Rn(wn) | Rd(wd)); +} + + +void Assembler::crc32cw(const Register& wd, + const Register& wn, + const Register& wm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && wm.Is32Bits()); + Emit(SF(wm) | Rm(wm) | CRC32CW | Rn(wn) | Rd(wd)); +} + + +void Assembler::crc32cx(const Register& wd, + const Register& wn, + const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCRC32)); + VIXL_ASSERT(wd.Is32Bits() && wn.Is32Bits() && xm.Is64Bits()); + Emit(SF(xm) | Rm(xm) | CRC32CX | Rn(wn) | Rd(wd)); +} + + +void Assembler::mul(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm)); + DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MADD); +} + + +void Assembler::madd(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + DataProcessing3Source(rd, rn, rm, ra, MADD); +} + + +void Assembler::mneg(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(AreSameSizeAndType(rd, rn, rm)); + DataProcessing3Source(rd, rn, rm, AppropriateZeroRegFor(rd), MSUB); +} + + +void Assembler::msub(const Register& rd, + const Register& rn, + const Register& rm, + const Register& ra) { + DataProcessing3Source(rd, rn, rm, ra, MSUB); +} + + +void Assembler::umaddl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa) { + VIXL_ASSERT(xd.Is64Bits() && xa.Is64Bits()); + VIXL_ASSERT(wn.Is32Bits() && wm.Is32Bits()); + DataProcessing3Source(xd, wn, wm, xa, UMADDL_x); +} + + +void Assembler::smaddl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa) { + VIXL_ASSERT(xd.Is64Bits() && xa.Is64Bits()); + VIXL_ASSERT(wn.Is32Bits() && wm.Is32Bits()); + DataProcessing3Source(xd, wn, wm, xa, SMADDL_x); +} + + +void Assembler::umsubl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa) { + VIXL_ASSERT(xd.Is64Bits() && xa.Is64Bits()); + VIXL_ASSERT(wn.Is32Bits() && wm.Is32Bits()); + DataProcessing3Source(xd, wn, wm, xa, UMSUBL_x); +} + + +void Assembler::smsubl(const Register& xd, + const Register& wn, + const Register& wm, + const Register& xa) { + VIXL_ASSERT(xd.Is64Bits() && xa.Is64Bits()); + VIXL_ASSERT(wn.Is32Bits() && wm.Is32Bits()); + DataProcessing3Source(xd, wn, wm, xa, SMSUBL_x); +} + + +void Assembler::smull(const Register& xd, + const Register& wn, + const Register& wm) { + VIXL_ASSERT(xd.Is64Bits()); + VIXL_ASSERT(wn.Is32Bits() && wm.Is32Bits()); + DataProcessing3Source(xd, wn, wm, xzr, SMADDL_x); +} + + +void Assembler::sdiv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::smulh(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits()); + DataProcessing3Source(xd, xn, xm, xzr, SMULH_x); +} + + +void Assembler::umulh(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits()); + DataProcessing3Source(xd, xn, xm, xzr, UMULH_x); +} + + +void Assembler::udiv(const Register& rd, + const Register& rn, + const Register& rm) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == rm.GetSizeInBits()); + Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd)); +} + + +void Assembler::rbit(const Register& rd, const Register& rn) { + DataProcessing1Source(rd, rn, RBIT); +} + + +void Assembler::rev16(const Register& rd, const Register& rn) { + DataProcessing1Source(rd, rn, REV16); +} + + +void Assembler::rev32(const Register& xd, const Register& xn) { + VIXL_ASSERT(xd.Is64Bits()); + DataProcessing1Source(xd, xn, REV); +} + + +void Assembler::rev(const Register& rd, const Register& rn) { + DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w); +} + + +void Assembler::clz(const Register& rd, const Register& rn) { + DataProcessing1Source(rd, rn, CLZ); +} + + +void Assembler::cls(const Register& rd, const Register& rn) { + DataProcessing1Source(rd, rn, CLS); +} + +#define PAUTH_VARIATIONS(V) \ + V(paci, PACI) \ + V(pacd, PACD) \ + V(auti, AUTI) \ + V(autd, AUTD) + +#define VIXL_DEFINE_ASM_FUNC(PRE, OP) \ + void Assembler::PRE##a(const Register& xd, const Register& xn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); \ + Emit(SF(xd) | OP##A | Rd(xd) | RnSP(xn)); \ + } \ + \ + void Assembler::PRE##za(const Register& xd) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ + VIXL_ASSERT(xd.Is64Bits()); \ + Emit(SF(xd) | OP##ZA | Rd(xd) | Rn(xzr)); \ + } \ + \ + void Assembler::PRE##b(const Register& xd, const Register& xn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits()); \ + Emit(SF(xd) | OP##B | Rd(xd) | RnSP(xn)); \ + } \ + \ + void Assembler::PRE##zb(const Register& xd) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); \ + VIXL_ASSERT(xd.Is64Bits()); \ + Emit(SF(xd) | OP##ZB | Rd(xd) | Rn(xzr)); \ + } + +PAUTH_VARIATIONS(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +void Assembler::pacga(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth, CPUFeatures::kPAuthGeneric)); + VIXL_ASSERT(xd.Is64Bits() && xn.Is64Bits() && xm.Is64Bits()); + Emit(SF(xd) | PACGA | Rd(xd) | Rn(xn) | RmSP(xm)); +} + +void Assembler::xpaci(const Register& xd) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xd.Is64Bits()); + Emit(SF(xd) | XPACI | Rd(xd) | Rn(xzr)); +} + +void Assembler::xpacd(const Register& xd) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + VIXL_ASSERT(xd.Is64Bits()); + Emit(SF(xd) | XPACD | Rd(xd) | Rn(xzr)); +} + + +void Assembler::ldp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src) { + LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2)); +} + + +void Assembler::stp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst) { + LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2)); +} + + +void Assembler::ldpsw(const Register& xt, + const Register& xt2, + const MemOperand& src) { + VIXL_ASSERT(xt.Is64Bits() && xt2.Is64Bits()); + LoadStorePair(xt, xt2, src, LDPSW_x); +} + + +void Assembler::LoadStorePair(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op) { + VIXL_ASSERT(CPUHas(rt, rt2)); + + // 'rt' and 'rt2' can only be aliased for stores. + VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || !rt.Is(rt2)); + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + VIXL_ASSERT(IsImmLSPair(addr.GetOffset(), CalcLSPairDataSize(op))); + + int offset = static_cast(addr.GetOffset()); + Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.GetBaseRegister()) | + ImmLSPair(offset, CalcLSPairDataSize(op)); + + Instr addrmodeop; + if (addr.IsImmediateOffset()) { + addrmodeop = LoadStorePairOffsetFixed; + } else { + if (addr.IsImmediatePreIndex()) { + addrmodeop = LoadStorePairPreIndexFixed; + } else { + VIXL_ASSERT(addr.IsImmediatePostIndex()); + addrmodeop = LoadStorePairPostIndexFixed; + } + } + + Instr emitop = addrmodeop | memop; + + // Only X registers may be specified for ldpsw. + VIXL_ASSERT(((emitop & LoadStorePairMask) != LDPSW_x) || rt.IsX()); + + Emit(emitop); +} + + +void Assembler::ldnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& src) { + LoadStorePairNonTemporal(rt, rt2, src, LoadPairNonTemporalOpFor(rt, rt2)); +} + + +void Assembler::stnp(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& dst) { + LoadStorePairNonTemporal(rt, rt2, dst, StorePairNonTemporalOpFor(rt, rt2)); +} + + +void Assembler::LoadStorePairNonTemporal(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairNonTemporalOp op) { + VIXL_ASSERT(CPUHas(rt, rt2)); + + VIXL_ASSERT(!rt.Is(rt2)); + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + VIXL_ASSERT(addr.IsImmediateOffset()); + + unsigned size = + CalcLSPairDataSize(static_cast( + static_cast(op) & static_cast(LoadStorePairMask))); + VIXL_ASSERT(IsImmLSPair(addr.GetOffset(), size)); + int offset = static_cast(addr.GetOffset()); + Emit(op | Rt(rt) | Rt2(rt2) | RnSP(addr.GetBaseRegister()) | + ImmLSPair(offset, size)); +} + + +// Memory instructions. +void Assembler::ldrb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, LDRB_w, option); +} + + +void Assembler::strb(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, dst, STRB_w, option); +} + + +void Assembler::ldrsb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option); +} + + +void Assembler::ldrh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, LDRH_w, option); +} + + +void Assembler::strh(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, dst, STRH_w, option); +} + + +void Assembler::ldrsh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option); +} + + +void Assembler::ldr(const CPURegister& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, src, LoadOpFor(rt), option); +} + + +void Assembler::str(const CPURegister& rt, + const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(rt, dst, StoreOpFor(rt), option); +} + + +void Assembler::ldrsw(const Register& xt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(xt.Is64Bits()); + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + LoadStore(xt, src, LDRSW_x, option); +} + + +void Assembler::ldurb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, LDRB_w, option); +} + + +void Assembler::sturb(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, dst, STRB_w, option); +} + + +void Assembler::ldursb(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w, option); +} + + +void Assembler::ldurh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, LDRH_w, option); +} + + +void Assembler::sturh(const Register& rt, + const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, dst, STRH_w, option); +} + + +void Assembler::ldursh(const Register& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w, option); +} + + +void Assembler::ldur(const CPURegister& rt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, src, LoadOpFor(rt), option); +} + + +void Assembler::stur(const CPURegister& rt, + const MemOperand& dst, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(rt, dst, StoreOpFor(rt), option); +} + + +void Assembler::ldursw(const Register& xt, + const MemOperand& src, + LoadStoreScalingOption option) { + VIXL_ASSERT(xt.Is64Bits()); + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + LoadStore(xt, src, LDRSW_x, option); +} + + +void Assembler::ldraa(const Register& xt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + LoadStorePAC(xt, src, LDRAA); +} + + +void Assembler::ldrab(const Register& xt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + LoadStorePAC(xt, src, LDRAB); +} + + +void Assembler::ldrsw(const Register& xt, RawLiteral* literal) { + VIXL_ASSERT(xt.Is64Bits()); + VIXL_ASSERT(literal->GetSize() == kWRegSizeInBytes); + ldrsw(xt, static_cast(LinkAndGetWordOffsetTo(literal))); +} + + +void Assembler::ldr(const CPURegister& rt, RawLiteral* literal) { + VIXL_ASSERT(CPUHas(rt)); + VIXL_ASSERT(literal->GetSize() == static_cast(rt.GetSizeInBytes())); + ldr(rt, static_cast(LinkAndGetWordOffsetTo(literal))); +} + + +void Assembler::ldrsw(const Register& rt, int64_t imm19) { + Emit(LDRSW_x_lit | ImmLLiteral(imm19) | Rt(rt)); +} + + +void Assembler::ldr(const CPURegister& rt, int64_t imm19) { + VIXL_ASSERT(CPUHas(rt)); + LoadLiteralOp op = LoadLiteralOpFor(rt); + Emit(op | ImmLLiteral(imm19) | Rt(rt)); +} + + +void Assembler::prfm(int op, int64_t imm19) { + Emit(PRFM_lit | ImmPrefetchOperation(op) | ImmLLiteral(imm19)); +} + +void Assembler::prfm(PrefetchOperation op, int64_t imm19) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfm(static_cast(op), imm19); +} + + +// Exclusive-access instructions. +void Assembler::stxrb(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::stxrh(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::stxr(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STXR_x : STXR_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::ldxrb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldxrh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldxr(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDXR_x : LDXR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::stxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(rt.GetSizeInBits() == rt2.GetSizeInBits()); + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STXP_x : STXP_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::ldxp(const Register& rt, + const Register& rt2, + const MemOperand& src) { + VIXL_ASSERT(rt.GetSizeInBits() == rt2.GetSizeInBits()); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDXP_x : LDXP_w; + Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.GetBaseRegister())); +} + + +void Assembler::stlxrb(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STLXRB_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::stlxrh(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STLXRH_w | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::stlxr(const Register& rs, + const Register& rt, + const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STLXR_x : STLXR_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::ldaxrb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDAXRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldaxrh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDAXRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldaxr(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDAXR_x : LDAXR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::stlxp(const Register& rs, + const Register& rt, + const Register& rt2, + const MemOperand& dst) { + VIXL_ASSERT(rt.GetSizeInBits() == rt2.GetSizeInBits()); + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STLXP_x : STLXP_w; + Emit(op | Rs(rs) | Rt(rt) | Rt2(rt2) | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::ldaxp(const Register& rt, + const Register& rt2, + const MemOperand& src) { + VIXL_ASSERT(rt.GetSizeInBits() == rt2.GetSizeInBits()); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDAXP_x : LDAXP_w; + Emit(op | Rs_mask | Rt(rt) | Rt2(rt2) | RnSP(src.GetBaseRegister())); +} + + +void Assembler::stlrb(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STLRB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + +void Assembler::stlurb(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(dst.IsImmediateOffset() && IsImmLSUnscaled(dst.GetOffset())); + + Instr base = RnSP(dst.GetBaseRegister()); + int64_t offset = dst.GetOffset(); + Emit(STLURB | Rt(rt) | base | ImmLS(static_cast(offset))); +} + + +void Assembler::stlrh(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STLRH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + +void Assembler::stlurh(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(dst.IsImmediateOffset() && IsImmLSUnscaled(dst.GetOffset())); + + Instr base = RnSP(dst.GetBaseRegister()); + int64_t offset = dst.GetOffset(); + Emit(STLURH | Rt(rt) | base | ImmLS(static_cast(offset))); +} + + +void Assembler::stlr(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STLR_x : STLR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + +void Assembler::stlur(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(dst.IsImmediateOffset() && IsImmLSUnscaled(dst.GetOffset())); + + Instr base = RnSP(dst.GetBaseRegister()); + int64_t offset = dst.GetOffset(); + Instr op = rt.Is64Bits() ? STLUR_x : STLUR_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast(offset))); +} + + +void Assembler::ldarb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDARB_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldarh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDARH_w | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldar(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDAR_x : LDAR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::stllrb(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kLORegions)); + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STLLRB | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::stllrh(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kLORegions)); + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + Emit(STLLRH | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::stllr(const Register& rt, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kLORegions)); + VIXL_ASSERT(dst.IsImmediateOffset() && (dst.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? STLLR_x : STLLR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(dst.GetBaseRegister())); +} + + +void Assembler::ldlarb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kLORegions)); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDLARB | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldlarh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kLORegions)); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + Emit(LDLARH | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +void Assembler::ldlar(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kLORegions)); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + LoadStoreExclusive op = rt.Is64Bits() ? LDLAR_x : LDLAR_w; + Emit(op | Rs_mask | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); +} + + +// clang-format off +#define COMPARE_AND_SWAP_W_X_LIST(V) \ + V(cas, CAS) \ + V(casa, CASA) \ + V(casl, CASL) \ + V(casal, CASAL) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rs, \ + const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ + VIXL_ASSERT(AreSameFormat(rs, rt)); \ + LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ + } +COMPARE_AND_SWAP_W_X_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// clang-format off +#define COMPARE_AND_SWAP_W_LIST(V) \ + V(casb, CASB) \ + V(casab, CASAB) \ + V(caslb, CASLB) \ + V(casalb, CASALB) \ + V(cash, CASH) \ + V(casah, CASAH) \ + V(caslh, CASLH) \ + V(casalh, CASALH) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rs, \ + const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ + Emit(OP | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ + } +COMPARE_AND_SWAP_W_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +// clang-format off +#define COMPARE_AND_SWAP_PAIR_LIST(V) \ + V(casp, CASP) \ + V(caspa, CASPA) \ + V(caspl, CASPL) \ + V(caspal, CASPAL) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rs, \ + const Register& rs1, \ + const Register& rt, \ + const Register& rt1, \ + const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + USE(rs1, rt1); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ + VIXL_ASSERT(AreEven(rs, rt)); \ + VIXL_ASSERT(AreConsecutive(rs, rs1)); \ + VIXL_ASSERT(AreConsecutive(rt, rt1)); \ + VIXL_ASSERT(AreSameFormat(rs, rs1, rt, rt1)); \ + LoadStoreExclusive op = rt.Is64Bits() ? OP##_x : OP##_w; \ + Emit(op | Rs(rs) | Rt(rt) | Rt2_mask | RnSP(src.GetBaseRegister())); \ + } +COMPARE_AND_SWAP_PAIR_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// These macros generate all the variations of the atomic memory operations, +// e.g. ldadd, ldadda, ldaddb, staddl, etc. +// For a full list of the methods with comments, see the assembler header file. + +// clang-format off +#define ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(V, DEF) \ + V(DEF, add, LDADD) \ + V(DEF, clr, LDCLR) \ + V(DEF, eor, LDEOR) \ + V(DEF, set, LDSET) \ + V(DEF, smax, LDSMAX) \ + V(DEF, smin, LDSMIN) \ + V(DEF, umax, LDUMAX) \ + V(DEF, umin, LDUMIN) + +#define ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \ + V(NAME, OP##_x, OP##_w) \ + V(NAME##l, OP##L_x, OP##L_w) \ + V(NAME##b, OP##B, OP##B) \ + V(NAME##lb, OP##LB, OP##LB) \ + V(NAME##h, OP##H, OP##H) \ + V(NAME##lh, OP##LH, OP##LH) + +#define ATOMIC_MEMORY_LOAD_MODES(V, NAME, OP) \ + ATOMIC_MEMORY_STORE_MODES(V, NAME, OP) \ + V(NAME##a, OP##A_x, OP##A_w) \ + V(NAME##al, OP##AL_x, OP##AL_w) \ + V(NAME##ab, OP##AB, OP##AB) \ + V(NAME##alb, OP##ALB, OP##ALB) \ + V(NAME##ah, OP##AH, OP##AH) \ + V(NAME##alh, OP##ALH, OP##ALH) +// clang-format on + +#define DEFINE_ASM_LOAD_FUNC(FN, OP_X, OP_W) \ + void Assembler::ld##FN(const Register& rs, \ + const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ + AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \ + Emit(op | Rs(rs) | Rt(rt) | RnSP(src.GetBaseRegister())); \ + } +#define DEFINE_ASM_STORE_FUNC(FN, OP_X, OP_W) \ + void Assembler::st##FN(const Register& rs, const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + ld##FN(rs, AppropriateZeroRegFor(rs), src); \ + } + +ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_LOAD_MODES, + DEFINE_ASM_LOAD_FUNC) +ATOMIC_MEMORY_SIMPLE_OPERATION_LIST(ATOMIC_MEMORY_STORE_MODES, + DEFINE_ASM_STORE_FUNC) + +#define DEFINE_ASM_SWP_FUNC(FN, OP_X, OP_W) \ + void Assembler::FN(const Register& rs, \ + const Register& rt, \ + const MemOperand& src) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kAtomics)); \ + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); \ + AtomicMemoryOp op = rt.Is64Bits() ? OP_X : OP_W; \ + Emit(op | Rs(rs) | Rt(rt) | RnSP(src.GetBaseRegister())); \ + } + +ATOMIC_MEMORY_LOAD_MODES(DEFINE_ASM_SWP_FUNC, swp, SWP) + +#undef DEFINE_ASM_LOAD_FUNC +#undef DEFINE_ASM_STORE_FUNC +#undef DEFINE_ASM_SWP_FUNC + + +void Assembler::ldaprb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc)); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + AtomicMemoryOp op = LDAPRB; + Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister())); +} + +void Assembler::ldapurb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Emit(LDAPURB | Rt(rt) | base | ImmLS(static_cast(offset))); +} + +void Assembler::ldapursb(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Instr op = rt.Is64Bits() ? LDAPURSB_x : LDAPURSB_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast(offset))); +} + +void Assembler::ldaprh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc)); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + AtomicMemoryOp op = LDAPRH; + Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister())); +} + +void Assembler::ldapurh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Emit(LDAPURH | Rt(rt) | base | ImmLS(static_cast(offset))); +} + +void Assembler::ldapursh(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + LoadStoreRCpcUnscaledOffsetOp op = rt.Is64Bits() ? LDAPURSH_x : LDAPURSH_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast(offset))); +} + +void Assembler::ldapr(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc)); + VIXL_ASSERT(src.IsImmediateOffset() && (src.GetOffset() == 0)); + AtomicMemoryOp op = rt.Is64Bits() ? LDAPR_x : LDAPR_w; + Emit(op | Rs(xzr) | Rt(rt) | RnSP(src.GetBaseRegister())); +} + +void Assembler::ldapur(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + LoadStoreRCpcUnscaledOffsetOp op = rt.Is64Bits() ? LDAPUR_x : LDAPUR_w; + Emit(op | Rt(rt) | base | ImmLS(static_cast(offset))); +} + +void Assembler::ldapursw(const Register& rt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm)); + VIXL_ASSERT(rt.Is64Bits()); + VIXL_ASSERT(src.IsImmediateOffset() && IsImmLSUnscaled(src.GetOffset())); + + Instr base = RnSP(src.GetBaseRegister()); + int64_t offset = src.GetOffset(); + Emit(LDAPURSW | Rt(rt) | base | ImmLS(static_cast(offset))); +} + +void Assembler::prfm(int op, + const MemOperand& address, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireUnscaledOffset); + VIXL_ASSERT(option != PreferUnscaledOffset); + Prefetch(op, address, option); +} + +void Assembler::prfm(PrefetchOperation op, + const MemOperand& address, + LoadStoreScalingOption option) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfm(static_cast(op), address, option); +} + + +void Assembler::prfum(int op, + const MemOperand& address, + LoadStoreScalingOption option) { + VIXL_ASSERT(option != RequireScaledOffset); + VIXL_ASSERT(option != PreferScaledOffset); + Prefetch(op, address, option); +} + +void Assembler::prfum(PrefetchOperation op, + const MemOperand& address, + LoadStoreScalingOption option) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfum(static_cast(op), address, option); +} + + +void Assembler::prfm(int op, RawLiteral* literal) { + prfm(op, static_cast(LinkAndGetWordOffsetTo(literal))); +} + +void Assembler::prfm(PrefetchOperation op, RawLiteral* literal) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + prfm(static_cast(op), literal); +} + + +void Assembler::sys(int op1, int crn, int crm, int op2, const Register& xt) { + VIXL_ASSERT(xt.Is64Bits()); + Emit(SYS | ImmSysOp1(op1) | CRn(crn) | CRm(crm) | ImmSysOp2(op2) | Rt(xt)); +} + + +void Assembler::sys(int op, const Register& xt) { + VIXL_ASSERT(xt.Is64Bits()); + Emit(SYS | SysOp(op) | Rt(xt)); +} + + +void Assembler::dc(DataCacheOp op, const Register& rt) { + if (op == CVAP) VIXL_ASSERT(CPUHas(CPUFeatures::kDCPoP)); + if (op == CVADP) VIXL_ASSERT(CPUHas(CPUFeatures::kDCCVADP)); + sys(op, rt); +} + + +void Assembler::ic(InstructionCacheOp op, const Register& rt) { + VIXL_ASSERT(op == IVAU); + sys(op, rt); +} + + +void Assembler::hint(SystemHint code) { hint(static_cast(code)); } + + +void Assembler::hint(int imm7) { + VIXL_ASSERT(IsUint7(imm7)); + Emit(HINT | ImmHint(imm7) | Rt(xzr)); +} + + +// MTE. + +void Assembler::addg(const Register& xd, + const Register& xn, + int offset, + int tag_offset) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes)); + + Emit(0x91800000 | RdSP(xd) | RnSP(xn) | + ImmUnsignedField<21, 16>(offset / kMTETagGranuleInBytes) | + ImmUnsignedField<13, 10>(tag_offset)); +} + +void Assembler::gmi(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + + Emit(0x9ac01400 | Rd(xd) | RnSP(xn) | Rm(xm)); +} + +void Assembler::irg(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + + Emit(0x9ac01000 | RdSP(xd) | RnSP(xn) | Rm(xm)); +} + +void Assembler::ldg(const Register& xt, const MemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + VIXL_ASSERT(addr.IsImmediateOffset()); + int offset = static_cast(addr.GetOffset()); + VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes)); + + Emit(0xd9600000 | Rt(xt) | RnSP(addr.GetBaseRegister()) | + ImmField<20, 12>(offset / static_cast(kMTETagGranuleInBytes))); +} + +void Assembler::StoreTagHelper(const Register& xt, + const MemOperand& addr, + Instr op) { + int offset = static_cast(addr.GetOffset()); + VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes)); + + Instr addr_mode; + if (addr.IsImmediateOffset()) { + addr_mode = 2; + } else if (addr.IsImmediatePreIndex()) { + addr_mode = 3; + } else { + VIXL_ASSERT(addr.IsImmediatePostIndex()); + addr_mode = 1; + } + + Emit(op | RdSP(xt) | RnSP(addr.GetBaseRegister()) | (addr_mode << 10) | + ImmField<20, 12>(offset / static_cast(kMTETagGranuleInBytes))); +} + +void Assembler::st2g(const Register& xt, const MemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + StoreTagHelper(xt, addr, 0xd9a00000); +} + +void Assembler::stg(const Register& xt, const MemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + StoreTagHelper(xt, addr, 0xd9200000); +} + +void Assembler::stgp(const Register& xt1, + const Register& xt2, + const MemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + int offset = static_cast(addr.GetOffset()); + VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes)); + + Instr addr_mode; + if (addr.IsImmediateOffset()) { + addr_mode = 2; + } else if (addr.IsImmediatePreIndex()) { + addr_mode = 3; + } else { + VIXL_ASSERT(addr.IsImmediatePostIndex()); + addr_mode = 1; + } + + Emit(0x68000000 | RnSP(addr.GetBaseRegister()) | (addr_mode << 23) | + ImmField<21, 15>(offset / static_cast(kMTETagGranuleInBytes)) | + Rt2(xt2) | Rt(xt1)); +} + +void Assembler::stz2g(const Register& xt, const MemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + StoreTagHelper(xt, addr, 0xd9e00000); +} + +void Assembler::stzg(const Register& xt, const MemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + StoreTagHelper(xt, addr, 0xd9600000); +} + +void Assembler::subg(const Register& xd, + const Register& xn, + int offset, + int tag_offset) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + VIXL_ASSERT(IsMultiple(offset, kMTETagGranuleInBytes)); + + Emit(0xd1800000 | RdSP(xd) | RnSP(xn) | + ImmUnsignedField<21, 16>(offset / kMTETagGranuleInBytes) | + ImmUnsignedField<13, 10>(tag_offset)); +} + +void Assembler::subp(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + + Emit(0x9ac00000 | Rd(xd) | RnSP(xn) | RmSP(xm)); +} + +void Assembler::subps(const Register& xd, + const Register& xn, + const Register& xm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMTE)); + + Emit(0xbac00000 | Rd(xd) | RnSP(xn) | RmSP(xm)); +} + +void Assembler::cpye(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d800400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyen(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d80c400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyern(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d808400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyewn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d804400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfe(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19800400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfen(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1980c400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfern(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19808400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfewn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19804400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfm(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19400400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfmn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1940c400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfmrn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19408400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfmwn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19404400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfp(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19000400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfpn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1900c400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfprn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19008400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyfpwn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x19004400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpym(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d400400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpymn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d40c400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpymrn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d408400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpymwn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d404400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyp(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d000400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpypn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d00c400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpyprn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d008400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::cpypwn(const Register& rd, + const Register& rs, + const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero() && !rs.IsZero()); + + Emit(0x1d004400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::sete(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x19c08400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::seten(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x19c0a400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setge(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x1dc08400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setgen(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x1dc0a400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setgm(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x1dc04400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setgmn(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x1dc06400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setgp(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x1dc00400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setgpn(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x1dc02400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setm(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x19c04400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setmn(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x19c06400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setp(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x19c00400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::setpn(const Register& rd, + const Register& rn, + const Register& rs) { + VIXL_ASSERT(CPUHas(CPUFeatures::kMOPS)); + VIXL_ASSERT(!AreAliased(rd, rn, rs)); + VIXL_ASSERT(!rd.IsZero() && !rn.IsZero()); + + Emit(0x19c02400 | Rd(rd) | Rn(rn) | Rs(rs)); +} + +void Assembler::abs(const Register& rd, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC)); + VIXL_ASSERT(rd.IsSameSizeAndType(rn)); + + Emit(0x5ac02000 | SF(rd) | Rd(rd) | Rn(rn)); +} + +void Assembler::cnt(const Register& rd, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC)); + VIXL_ASSERT(rd.IsSameSizeAndType(rn)); + + Emit(0x5ac01c00 | SF(rd) | Rd(rd) | Rn(rn)); +} + +void Assembler::ctz(const Register& rd, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kCSSC)); + VIXL_ASSERT(rd.IsSameSizeAndType(rn)); + + Emit(0x5ac01800 | SF(rd) | Rd(rd) | Rn(rn)); +} + +#define MINMAX(V) \ + V(smax, 0x11c00000, 0x1ac06000, true) \ + V(smin, 0x11c80000, 0x1ac06800, true) \ + V(umax, 0x11c40000, 0x1ac06400, false) \ + V(umin, 0x11cc0000, 0x1ac06c00, false) + +#define VIXL_DEFINE_ASM_FUNC(FN, IMMOP, REGOP, SIGNED) \ + void Assembler::FN(const Register& rd, \ + const Register& rn, \ + const Operand& op) { \ + VIXL_ASSERT(rd.IsSameSizeAndType(rn)); \ + Instr i = SF(rd) | Rd(rd) | Rn(rn); \ + if (op.IsImmediate()) { \ + int64_t imm = op.GetImmediate(); \ + i |= SIGNED ? ImmField<17, 10>(imm) : ImmUnsignedField<17, 10>(imm); \ + Emit(IMMOP | i); \ + } else { \ + VIXL_ASSERT(op.IsPlainRegister()); \ + VIXL_ASSERT(op.GetRegister().IsSameSizeAndType(rd)); \ + Emit(REGOP | i | Rm(op.GetRegister())); \ + } \ + } +MINMAX(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// NEON structure loads and stores. +Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) { + Instr addr_field = RnSP(addr.GetBaseRegister()); + + if (addr.IsPostIndex()) { + VIXL_STATIC_ASSERT(NEONLoadStoreMultiStructPostIndex == + static_cast( + NEONLoadStoreSingleStructPostIndex)); + + addr_field |= NEONLoadStoreMultiStructPostIndex; + if (addr.GetOffset() == 0) { + addr_field |= RmNot31(addr.GetRegisterOffset()); + } else { + // The immediate post index addressing mode is indicated by rm = 31. + // The immediate is implied by the number of vector registers used. + addr_field |= (0x1f << Rm_offset); + } + } else { + VIXL_ASSERT(addr.IsImmediateOffset() && (addr.GetOffset() == 0)); + } + return addr_field; +} + +void Assembler::LoadStoreStructVerify(const VRegister& vt, + const MemOperand& addr, + Instr op) { +#ifdef VIXL_DEBUG + // Assert that addressing mode is either offset (with immediate 0), post + // index by immediate of the size of the register list, or post index by a + // value in a core register. + VIXL_ASSERT(vt.HasSize() && vt.HasLaneSize()); + if (addr.IsImmediateOffset()) { + VIXL_ASSERT(addr.GetOffset() == 0); + } else { + int offset = vt.GetSizeInBytes(); + switch (op) { + case NEON_LD1_1v: + case NEON_ST1_1v: + offset *= 1; + break; + case NEONLoadStoreSingleStructLoad1: + case NEONLoadStoreSingleStructStore1: + case NEON_LD1R: + offset = (offset / vt.GetLanes()) * 1; + break; + + case NEON_LD1_2v: + case NEON_ST1_2v: + case NEON_LD2: + case NEON_ST2: + offset *= 2; + break; + case NEONLoadStoreSingleStructLoad2: + case NEONLoadStoreSingleStructStore2: + case NEON_LD2R: + offset = (offset / vt.GetLanes()) * 2; + break; + + case NEON_LD1_3v: + case NEON_ST1_3v: + case NEON_LD3: + case NEON_ST3: + offset *= 3; + break; + case NEONLoadStoreSingleStructLoad3: + case NEONLoadStoreSingleStructStore3: + case NEON_LD3R: + offset = (offset / vt.GetLanes()) * 3; + break; + + case NEON_LD1_4v: + case NEON_ST1_4v: + case NEON_LD4: + case NEON_ST4: + offset *= 4; + break; + case NEONLoadStoreSingleStructLoad4: + case NEONLoadStoreSingleStructStore4: + case NEON_LD4R: + offset = (offset / vt.GetLanes()) * 4; + break; + default: + VIXL_UNREACHABLE(); + } + VIXL_ASSERT(!addr.GetRegisterOffset().Is(NoReg) || + addr.GetOffset() == offset); + } +#else + USE(vt, addr, op); +#endif +} + +void Assembler::LoadStoreStruct(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreMultiStructOp op) { + LoadStoreStructVerify(vt, addr, op); + VIXL_ASSERT(vt.IsVector() || vt.Is1D()); + Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt)); +} + + +void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op) { + LoadStoreStructVerify(vt, addr, op); + Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt)); +} + + +void Assembler::ld1(const VRegister& vt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + LoadStoreStruct(vt, src, NEON_LD1_1v); +} + + +void Assembler::ld1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, src, NEON_LD1_2v); +} + + +void Assembler::ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, src, NEON_LD1_3v); +} + + +void Assembler::ld1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, src, NEON_LD1_4v); +} + + +void Assembler::ld2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, src, NEON_LD2); +} + + +void Assembler::ld2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2); +} + + +void Assembler::ld2r(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R); +} + + +void Assembler::ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, src, NEON_LD3); +} + + +void Assembler::ld3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3); +} + + +void Assembler::ld3r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R); +} + + +void Assembler::ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, src, NEON_LD4); +} + + +void Assembler::ld4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4); +} + + +void Assembler::ld4r(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R); +} + + +void Assembler::st1(const VRegister& vt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + LoadStoreStruct(vt, src, NEON_ST1_1v); +} + + +void Assembler::st1(const VRegister& vt, + const VRegister& vt2, + const MemOperand& src) { + USE(vt2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, src, NEON_ST1_2v); +} + + +void Assembler::st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& src) { + USE(vt2, vt3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, src, NEON_ST1_3v); +} + + +void Assembler::st1(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& src) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, src, NEON_ST1_4v); +} + + +void Assembler::st2(const VRegister& vt, + const VRegister& vt2, + const MemOperand& dst) { + USE(vt2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStruct(vt, dst, NEON_ST2); +} + + +void Assembler::st2(const VRegister& vt, + const VRegister& vt2, + int lane, + const MemOperand& dst) { + USE(vt2); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2)); + VIXL_ASSERT(AreConsecutive(vt, vt2)); + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2); +} + + +void Assembler::st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const MemOperand& dst) { + USE(vt2, vt3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStruct(vt, dst, NEON_ST3); +} + + +void Assembler::st3(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + int lane, + const MemOperand& dst) { + USE(vt2, vt3); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3)); + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3); +} + + +void Assembler::st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + const MemOperand& dst) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStruct(vt, dst, NEON_ST4); +} + + +void Assembler::st4(const VRegister& vt, + const VRegister& vt2, + const VRegister& vt3, + const VRegister& vt4, + int lane, + const MemOperand& dst) { + USE(vt2, vt3, vt4); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vt, vt2, vt3, vt4)); + VIXL_ASSERT(AreConsecutive(vt, vt2, vt3, vt4)); + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4); +} + + +void Assembler::LoadStoreStructSingle(const VRegister& vt, + uint32_t lane, + const MemOperand& addr, + NEONLoadStoreSingleStructOp op) { + LoadStoreStructVerify(vt, addr, op); + + // We support vt arguments of the form vt.VxT() or vt.T(), where x is the + // number of lanes, and T is b, h, s or d. + unsigned lane_size = vt.GetLaneSizeInBytes(); + VIXL_ASSERT(lane_size > 0); + VIXL_ASSERT(lane < (kQRegSizeInBytes / lane_size)); + + // Lane size is encoded in the opcode field. Lane index is encoded in the Q, + // S and size fields. + lane *= lane_size; + if (lane_size == 8) lane++; + + Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask; + Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask; + Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask; + + Instr instr = op; + switch (lane_size) { + case 1: + instr |= NEONLoadStoreSingle_b; + break; + case 2: + instr |= NEONLoadStoreSingle_h; + break; + case 4: + instr |= NEONLoadStoreSingle_s; + break; + default: + VIXL_ASSERT(lane_size == 8); + instr |= NEONLoadStoreSingle_d; + } + + Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt)); +} + + +void Assembler::ld1(const VRegister& vt, int lane, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1); +} + + +void Assembler::ld1r(const VRegister& vt, const MemOperand& src) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R); +} + + +void Assembler::st1(const VRegister& vt, int lane, const MemOperand& dst) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1); +} + + +void Assembler::NEON3DifferentL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop) { + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vn.Is1H() && vd.Is1S()) || (vn.Is1S() && vd.Is1D()) || + (vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) || + (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) || + (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D())); + Instr format, op = vop; + if (vd.IsScalar()) { + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + format = SFormat(vn); + } else { + format = VFormat(vn); + } + Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEON3DifferentW(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT((vm.Is8B() && vd.Is8H()) || (vm.Is4H() && vd.Is4S()) || + (vm.Is2S() && vd.Is2D()) || (vm.Is16B() && vd.Is8H()) || + (vm.Is8H() && vd.Is4S()) || (vm.Is4S() && vd.Is2D())); + Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEON3DifferentHN(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3DifferentOp vop) { + VIXL_ASSERT(AreSameFormat(vm, vn)); + VIXL_ASSERT((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) || + (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) || + (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D())); + Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +// clang-format off +#define NEON_3DIFF_LONG_LIST(V) \ + V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B()) \ + V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B()) \ + V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \ + V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \ + V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \ + V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ()) \ + V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD()) \ + V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ()) \ + V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD()) \ + V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ()) \ + V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD()) \ + V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ()) \ + V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD()) \ + V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ()) \ + V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD()) \ + V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ()) \ + V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD()) \ + V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ()) \ + V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD()) \ + V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ()) \ + V(smull, NEON_SMULL, vn.IsVector() && vn.IsD()) \ + V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ()) \ + V(umull, NEON_UMULL, vn.IsVector() && vn.IsD()) \ + V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ()) \ + V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD()) \ + V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ()) \ + V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD()) \ + V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ()) \ + V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD()) \ + V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ()) \ + V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \ + V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \ + V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \ + V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \ + V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \ + V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \ +// clang-format on + + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ +void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(AS); \ + NEON3DifferentL(vd, vn, vm, OP); \ +} +NEON_3DIFF_LONG_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// clang-format off +#define NEON_3DIFF_HN_LIST(V) \ + V(addhn, NEON_ADDHN, vd.IsD()) \ + V(addhn2, NEON_ADDHN2, vd.IsQ()) \ + V(raddhn, NEON_RADDHN, vd.IsD()) \ + V(raddhn2, NEON_RADDHN2, vd.IsQ()) \ + V(subhn, NEON_SUBHN, vd.IsD()) \ + V(subhn2, NEON_SUBHN2, vd.IsQ()) \ + V(rsubhn, NEON_RSUBHN, vd.IsD()) \ + V(rsubhn2, NEON_RSUBHN2, vd.IsQ()) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(AS); \ + NEON3DifferentHN(vd, vn, vm, OP); \ + } +NEON_3DIFF_HN_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +void Assembler::uaddw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_UADDW); +} + + +void Assembler::uaddw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_UADDW2); +} + + +void Assembler::saddw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_SADDW); +} + + +void Assembler::saddw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_SADDW2); +} + + +void Assembler::usubw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_USUBW); +} + + +void Assembler::usubw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_USUBW2); +} + + +void Assembler::ssubw(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsD()); + NEON3DifferentW(vd, vn, vm, NEON_SSUBW); +} + + +void Assembler::ssubw2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vm.IsQ()); + NEON3DifferentW(vd, vn, vm, NEON_SSUBW2); +} + + +void Assembler::mov(const Register& rd, const Register& rm) { + // Moves involving the stack pointer are encoded as add immediate with + // second operand of zero. Otherwise, orr with first operand zr is + // used. + if (rd.IsSP() || rm.IsSP()) { + add(rd, rm, 0); + } else { + orr(rd, AppropriateZeroRegFor(rd), rm); + } +} + +void Assembler::xpaclri() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(XPACLRI); +} + +void Assembler::pacia1716() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(PACIA1716); +} + +void Assembler::pacib1716() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(PACIB1716); +} + +void Assembler::autia1716() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(AUTIA1716); +} + +void Assembler::autib1716() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(AUTIB1716); +} + +void Assembler::paciaz() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(PACIAZ); +} + +void Assembler::pacibz() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(PACIBZ); +} + +void Assembler::autiaz() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(AUTIAZ); +} + +void Assembler::autibz() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(AUTIBZ); +} + +void Assembler::paciasp() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(PACIASP); +} + +void Assembler::pacibsp() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(PACIBSP); +} + +void Assembler::autiasp() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(AUTIASP); +} + +void Assembler::autibsp() { + VIXL_ASSERT(CPUHas(CPUFeatures::kPAuth)); + Emit(AUTIBSP); +} + +void Assembler::bti(BranchTargetIdentifier id) { + VIXL_ASSERT((id != EmitPACIASP) && (id != EmitPACIBSP)); // Not modes of Bti. + VIXL_ASSERT(id != EmitBTI_none); // Always generate an instruction. + VIXL_ASSERT(CPUHas(CPUFeatures::kBTI)); + hint(static_cast(id)); +} + +void Assembler::mvn(const Register& rd, const Operand& operand) { + orn(rd, AppropriateZeroRegFor(rd), operand); +} + + +void Assembler::mrs(const Register& xt, SystemRegister sysreg) { + VIXL_ASSERT(xt.Is64Bits()); + VIXL_ASSERT(CPUHas(sysreg)); + Emit(MRS | ImmSystemRegister(sysreg) | Rt(xt)); +} + + +void Assembler::msr(SystemRegister sysreg, const Register& xt) { + VIXL_ASSERT(xt.Is64Bits()); + VIXL_ASSERT(CPUHas(sysreg)); + Emit(MSR | Rt(xt) | ImmSystemRegister(sysreg)); +} + + +void Assembler::cfinv() { + VIXL_ASSERT(CPUHas(CPUFeatures::kFlagM)); + Emit(CFINV); +} + + +void Assembler::axflag() { + VIXL_ASSERT(CPUHas(CPUFeatures::kAXFlag)); + Emit(AXFLAG); +} + + +void Assembler::xaflag() { + VIXL_ASSERT(CPUHas(CPUFeatures::kAXFlag)); + Emit(XAFLAG); +} + + +void Assembler::clrex(int imm4) { Emit(CLREX | CRm(imm4)); } + + +void Assembler::dmb(BarrierDomain domain, BarrierType type) { + Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type)); +} + + +void Assembler::dsb(BarrierDomain domain, BarrierType type) { + Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type)); +} + + +void Assembler::isb() { + Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll)); +} + +void Assembler::esb() { + VIXL_ASSERT(CPUHas(CPUFeatures::kRAS)); + hint(ESB); +} + +void Assembler::csdb() { hint(CSDB); } + +void Assembler::fmov(const VRegister& vd, double imm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1D()); + Emit(FMOV_d_imm | Rd(vd) | ImmFP64(imm)); + } else { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.Is2D()); + Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit; + Instr q = NEON_Q; + uint32_t encoded_imm = FP64ToImm8(imm); + Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd)); + } +} + + +void Assembler::fmov(const VRegister& vd, float imm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1S()); + Emit(FMOV_s_imm | Rd(vd) | ImmFP32(imm)); + } else { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + Instr op = NEONModifiedImmediate_MOVI; + Instr q = vd.Is4S() ? NEON_Q : 0; + uint32_t encoded_imm = FP32ToImm8(imm); + Emit(q | op | ImmNEONabcdefgh(encoded_imm) | NEONCmode(0xf) | Rd(vd)); + } +} + + +void Assembler::fmov(const VRegister& vd, Float16 imm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vd.IsScalar()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + VIXL_ASSERT(vd.Is1H()); + Emit(FMOV_h_imm | Rd(vd) | ImmFP16(imm)); + } else { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kNEONHalf)); + VIXL_ASSERT(vd.Is4H() || vd.Is8H()); + Instr q = vd.Is8H() ? NEON_Q : 0; + uint32_t encoded_imm = FP16ToImm8(imm); + Emit(q | NEONModifiedImmediate_FMOV | ImmNEONabcdefgh(encoded_imm) | + NEONCmode(0xf) | Rd(vd)); + } +} + + +void Assembler::fmov(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D()); + VIXL_ASSERT((rd.GetSizeInBits() == vn.GetSizeInBits()) || vn.Is1H()); + FPIntegerConvertOp op; + switch (vn.GetSizeInBits()) { + case 16: + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + op = rd.Is64Bits() ? FMOV_xh : FMOV_wh; + break; + case 32: + op = FMOV_ws; + break; + default: + op = FMOV_xd; + } + Emit(op | Rd(rd) | Rn(vn)); +} + + +void Assembler::fmov(const VRegister& vd, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP) || + (vd.Is1D() && CPUHas(CPUFeatures::kNEON))); + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + VIXL_ASSERT((vd.GetSizeInBits() == rn.GetSizeInBits()) || vd.Is1H()); + FPIntegerConvertOp op; + switch (vd.GetSizeInBits()) { + case 16: + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + op = rn.Is64Bits() ? FMOV_hx : FMOV_hw; + break; + case 32: + op = FMOV_sw; + break; + default: + op = FMOV_dx; + } + Emit(op | Rd(vd) | Rn(rn)); +} + + +void Assembler::fmov(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + } + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(vd.IsSameFormat(vn)); + Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn)); +} + + +void Assembler::fmov(const VRegister& vd, int index, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kFP)); + VIXL_ASSERT((index == 1) && vd.Is1D() && rn.IsX()); + USE(index); + Emit(FMOV_d1_x | Rd(vd) | Rn(rn)); +} + + +void Assembler::fmov(const Register& rd, const VRegister& vn, int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kFP)); + VIXL_ASSERT((index == 1) && vn.Is1D() && rd.IsX()); + USE(index); + Emit(FMOV_x_d1 | Rd(rd) | Rn(vn)); +} + + +void Assembler::fmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + FPDataProcessing3SourceOp op; + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + op = FMADD_h; + } else if (vd.Is1S()) { + op = FMADD_s; + } else { + VIXL_ASSERT(vd.Is1D()); + op = FMADD_d; + } + FPDataProcessing3Source(vd, vn, vm, va, op); +} + + +void Assembler::fmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + FPDataProcessing3SourceOp op; + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + op = FMSUB_h; + } else if (vd.Is1S()) { + op = FMSUB_s; + } else { + VIXL_ASSERT(vd.Is1D()); + op = FMSUB_d; + } + FPDataProcessing3Source(vd, vn, vm, va, op); +} + + +void Assembler::fnmadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + FPDataProcessing3SourceOp op; + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + op = FNMADD_h; + } else if (vd.Is1S()) { + op = FNMADD_s; + } else { + VIXL_ASSERT(vd.Is1D()); + op = FNMADD_d; + } + FPDataProcessing3Source(vd, vn, vm, va, op); +} + + +void Assembler::fnmsub(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + FPDataProcessing3SourceOp op; + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + op = FNMSUB_h; + } else if (vd.Is1S()) { + op = FNMSUB_s; + } else { + VIXL_ASSERT(vd.Is1D()); + op = FNMSUB_d; + } + FPDataProcessing3Source(vd, vn, vm, va, op); +} + + +void Assembler::fnmul(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm)); + Instr op; + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + op = FNMUL_h; + } else if (vd.Is1S()) { + op = FNMUL_s; + } else { + VIXL_ASSERT(vd.Is1D()); + op = FNMUL_d; + } + Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::FPCompareMacro(const VRegister& vn, + double value, + FPTrapFlags trap) { + USE(value); + // Although the fcmp{e} instructions can strictly only take an immediate + // value of +0.0, we don't need to check for -0.0 because the sign of 0.0 + // doesn't affect the result of the comparison. + VIXL_ASSERT(value == 0.0); + VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D()); + Instr op = (trap == EnableTrap) ? FCMPE_zero : FCMP_zero; + Emit(FPType(vn) | op | Rn(vn)); +} + + +void Assembler::FPCompareMacro(const VRegister& vn, + const VRegister& vm, + FPTrapFlags trap) { + VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D()); + VIXL_ASSERT(vn.IsSameSizeAndType(vm)); + Instr op = (trap == EnableTrap) ? FCMPE : FCMP; + Emit(FPType(vn) | op | Rm(vm) | Rn(vn)); +} + + +void Assembler::fcmp(const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + FPCompareMacro(vn, vm, DisableTrap); +} + + +void Assembler::fcmpe(const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + FPCompareMacro(vn, vm, EnableTrap); +} + + +void Assembler::fcmp(const VRegister& vn, double value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + FPCompareMacro(vn, value, DisableTrap); +} + + +void Assembler::fcmpe(const VRegister& vn, double value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + FPCompareMacro(vn, value, EnableTrap); +} + + +void Assembler::FPCCompareMacro(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond, + FPTrapFlags trap) { + VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D()); + VIXL_ASSERT(vn.IsSameSizeAndType(vm)); + Instr op = (trap == EnableTrap) ? FCCMPE : FCCMP; + Emit(FPType(vn) | op | Rm(vm) | Cond(cond) | Rn(vn) | Nzcv(nzcv)); +} + +void Assembler::fccmp(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + FPCCompareMacro(vn, vm, nzcv, cond, DisableTrap); +} + + +void Assembler::fccmpe(const VRegister& vn, + const VRegister& vm, + StatusFlags nzcv, + Condition cond) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + FPCCompareMacro(vn, vm, nzcv, cond, EnableTrap); +} + + +void Assembler::fcsel(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Condition cond) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + Emit(FPType(vd) | FCSEL | Rm(vm) | Cond(cond) | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvt(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + FPDataProcessing1SourceOp op; + // The half-precision variants belong to base FP, and do not require kFPHalf. + if (vd.Is1D()) { + VIXL_ASSERT(vn.Is1S() || vn.Is1H()); + op = vn.Is1S() ? FCVT_ds : FCVT_dh; + } else if (vd.Is1S()) { + VIXL_ASSERT(vn.Is1D() || vn.Is1H()); + op = vn.Is1D() ? FCVT_sd : FCVT_sh; + } else { + VIXL_ASSERT(vd.Is1H()); + VIXL_ASSERT(vn.Is1D() || vn.Is1S()); + op = vn.Is1D() ? FCVT_hd : FCVT_hs; + } + FPDataProcessing1Source(vd, vn, op); +} + + +void Assembler::fcvtl(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is4S() && vn.Is4H()) || (vd.Is2D() && vn.Is2S())); + // The half-precision variants belong to base FP, and do not require kFPHalf. + Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtl2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is4S() && vn.Is8H()) || (vd.Is2D() && vn.Is4S())); + // The half-precision variants belong to base FP, and do not require kFPHalf. + Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vn.Is4S() && vd.Is4H()) || (vn.Is2D() && vd.Is2S())); + // The half-precision variants belong to base FP, and do not require kFPHalf. + Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtn2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vn.Is4S() && vd.Is8H()) || (vn.Is2D() && vd.Is4S())); + // The half-precision variants belong to base FP, and do not require kFPHalf. + Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0; + Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcvtxn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + Instr format = 1 << NEONSize_offset; + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1S() && vn.Is1D()); + Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd)); + } else { + VIXL_ASSERT(vd.Is2S() && vn.Is2D()); + Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd)); + } +} + + +void Assembler::fcvtxn2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT(vd.Is4S() && vn.Is2D()); + Instr format = 1 << NEONSize_offset; + Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd)); +} + +void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kJSCVT)); + VIXL_ASSERT(rd.IsW() && vn.Is1D()); + Emit(FJCVTZS | Rn(vn) | Rd(rd)); +} + + +void Assembler::NEONFPConvertToInt(const Register& rd, + const VRegister& vn, + Instr op) { + Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd)); +} + + +void Assembler::NEONFPConvertToInt(const VRegister& vd, + const VRegister& vn, + Instr op) { + if (vn.IsScalar()) { + VIXL_ASSERT((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D())); + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + } + Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONFP16ConvertToInt(const VRegister& vd, + const VRegister& vn, + Instr op) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vn.IsLaneSizeH()); + if (vn.IsScalar()) { + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + } else if (vn.Is8H()) { + op |= static_cast(NEON_Q); + } + Emit(op | Rn(vn) | Rd(vd)); +} + + +#define NEON_FP2REGMISC_FCVT_LIST(V) \ + V(fcvtnu, NEON_FCVTNU, FCVTNU) \ + V(fcvtns, NEON_FCVTNS, FCVTNS) \ + V(fcvtpu, NEON_FCVTPU, FCVTPU) \ + V(fcvtps, NEON_FCVTPS, FCVTPS) \ + V(fcvtmu, NEON_FCVTMU, FCVTMU) \ + V(fcvtms, NEON_FCVTMS, FCVTMS) \ + V(fcvtau, NEON_FCVTAU, FCVTAU) \ + V(fcvtas, NEON_FCVTAS, FCVTAS) + +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ + void Assembler::FN(const Register& rd, const VRegister& vn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \ + if (vn.IsH()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \ + NEONFPConvertToInt(rd, vn, SCA_OP); \ + } \ + void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \ + if (vd.IsLaneSizeH()) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ + NEONFP16ConvertToInt(vd, vn, VEC_OP##_H); \ + } else { \ + NEONFPConvertToInt(vd, vn, VEC_OP); \ + } \ + } +NEON_FP2REGMISC_FCVT_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D()); + VIXL_ASSERT((fbits >= 0) && (fbits <= rd.GetSizeInBits())); + if (fbits == 0) { + Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd)); + } else { + Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) | + Rd(rd)); + } +} + + +void Assembler::fcvtzs(const VRegister& vd, const VRegister& vn, int fbits) { + // This form is a NEON scalar FP instruction. + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + if (vd.IsLaneSizeH()) { + NEONFP2RegMiscFP16(vd, vn, NEON_FCVTZS_H); + } else { + NEONFP2RegMisc(vd, vn, NEON_FCVTZS); + } + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() || + vd.Is1H() || vd.Is4H() || vd.Is8H()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm); + } +} + + +void Assembler::fcvtzu(const Register& rd, const VRegister& vn, int fbits) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vn.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + VIXL_ASSERT(vn.Is1H() || vn.Is1S() || vn.Is1D()); + VIXL_ASSERT((fbits >= 0) && (fbits <= rd.GetSizeInBits())); + if (fbits == 0) { + Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd)); + } else { + Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) | + Rd(rd)); + } +} + + +void Assembler::fcvtzu(const VRegister& vd, const VRegister& vn, int fbits) { + // This form is a NEON scalar FP instruction. + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + if (vd.IsLaneSizeH()) { + NEONFP2RegMiscFP16(vd, vn, NEON_FCVTZU_H); + } else { + NEONFP2RegMisc(vd, vn, NEON_FCVTZU); + } + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() || + vd.Is1H() || vd.Is4H() || vd.Is8H()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm); + } +} + +void Assembler::ucvtf(const VRegister& vd, const VRegister& vn, int fbits) { + // This form is a NEON scalar FP instruction. + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + if (vd.IsLaneSizeH()) { + NEONFP2RegMiscFP16(vd, vn, NEON_UCVTF_H); + } else { + NEONFP2RegMisc(vd, vn, NEON_UCVTF); + } + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() || + vd.Is1H() || vd.Is4H() || vd.Is8H()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm); + } +} + +void Assembler::scvtf(const VRegister& vd, const VRegister& vn, int fbits) { + // This form is a NEON scalar FP instruction. + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vn.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + if (vd.IsLaneSizeH()) { + NEONFP2RegMiscFP16(vd, vn, NEON_SCVTF_H); + } else { + NEONFP2RegMisc(vd, vn, NEON_SCVTF); + } + } else { + VIXL_ASSERT(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S() || + vd.Is1H() || vd.Is4H() || vd.Is8H()); + NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm); + } +} + + +void Assembler::scvtf(const VRegister& vd, const Register& rn, int fbits) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd)); + } else { + Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) | + Rd(vd)); + } +} + + +void Assembler::ucvtf(const VRegister& vd, const Register& rn, int fbits) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); + if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(fbits >= 0); + if (fbits == 0) { + Emit(SF(rn) | FPType(vd) | UCVTF | Rn(rn) | Rd(vd)); + } else { + Emit(SF(rn) | FPType(vd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) | + Rd(vd)); + } +} + + +void Assembler::NEON3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEON3SameOp vop) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.IsVector() || !vd.IsQ()); + + Instr format, op = vop; + if (vd.IsScalar()) { + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + format = SFormat(vd); + } else { + format = VFormat(vd); + } + + Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONFP3Same(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Instr op) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEON3SameFP16(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + Instr op) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.GetLaneSizeInBytes() == kHRegSizeInBytes); + if (vd.Is8H()) op |= NEON_Q; + Emit(op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +// clang-format off +#define NEON_FP2REGMISC_LIST(V) \ + V(fabs, NEON_FABS, FABS, FABS_h) \ + V(fneg, NEON_FNEG, FNEG, FNEG_h) \ + V(fsqrt, NEON_FSQRT, FSQRT, FSQRT_h) \ + V(frintn, NEON_FRINTN, FRINTN, FRINTN_h) \ + V(frinta, NEON_FRINTA, FRINTA, FRINTA_h) \ + V(frintp, NEON_FRINTP, FRINTP, FRINTP_h) \ + V(frintm, NEON_FRINTM, FRINTM, FRINTM_h) \ + V(frintx, NEON_FRINTX, FRINTX, FRINTX_h) \ + V(frintz, NEON_FRINTZ, FRINTZ, FRINTZ_h) \ + V(frinti, NEON_FRINTI, FRINTI, FRINTI_h) \ + V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar, NEON_FRSQRTE_H_scalar) \ + V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar, NEON_FRECPE_H_scalar) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ + void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \ + Instr op; \ + if (vd.IsScalar()) { \ + if (vd.Is1H()) { \ + if ((static_cast(SCA_OP_H) & \ + static_cast(NEONScalar2RegMiscFP16FMask)) == \ + static_cast(NEONScalar2RegMiscFP16Fixed)) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kNEONHalf)); \ + } else { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \ + } \ + op = SCA_OP_H; \ + } else { \ + if ((static_cast(SCA_OP) & \ + static_cast(NEONScalar2RegMiscFMask)) == \ + static_cast(NEONScalar2RegMiscFixed)) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + } \ + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \ + op = SCA_OP; \ + } \ + } else { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(vd.Is4H() || vd.Is8H() || vd.Is2S() || vd.Is2D() || \ + vd.Is4S()); \ + if (vd.IsLaneSizeH()) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ + op = VEC_OP##_H; \ + if (vd.Is8H()) { \ + op |= static_cast(NEON_Q); \ + } \ + } else { \ + op = VEC_OP; \ + } \ + } \ + if (vd.IsLaneSizeH()) { \ + NEONFP2RegMiscFP16(vd, vn, op); \ + } else { \ + NEONFP2RegMisc(vd, vn, op); \ + } \ + } +NEON_FP2REGMISC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// clang-format off +#define NEON_FP2REGMISC_V85_LIST(V) \ + V(frint32x, NEON_FRINT32X, FRINT32X) \ + V(frint32z, NEON_FRINT32Z, FRINT32Z) \ + V(frint64x, NEON_FRINT64X, FRINT64X) \ + V(frint64z, NEON_FRINT64Z, FRINT64Z) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \ + void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt)); \ + Instr op; \ + if (vd.IsScalar()) { \ + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); \ + op = SCA_OP; \ + } else { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \ + op = VEC_OP; \ + } \ + NEONFP2RegMisc(vd, vn, op); \ + } +NEON_FP2REGMISC_V85_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +void Assembler::NEONFP2RegMiscFP16(const VRegister& vd, + const VRegister& vn, + Instr op) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + Emit(op | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONFP2RegMisc(const VRegister& vd, + const VRegister& vn, + Instr op) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEON2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + int value) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(value == 0); + USE(value); + + Instr format, op = vop; + if (vd.IsScalar()) { + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + format = SFormat(vd); + } else { + format = VFormat(vd); + } + + Emit(format | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::cmeq(const VRegister& vd, const VRegister& vn, int value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value); +} + + +void Assembler::cmge(const VRegister& vd, const VRegister& vn, int value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMGE_zero, value); +} + + +void Assembler::cmgt(const VRegister& vd, const VRegister& vn, int value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMGT_zero, value); +} + + +void Assembler::cmle(const VRegister& vd, const VRegister& vn, int value) { + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEON2RegMisc(vd, vn, NEON_CMLE_zero, value); +} + + +void Assembler::cmlt(const VRegister& vd, const VRegister& vn, int value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_CMLT_zero, value); +} + + +void Assembler::shll(const VRegister& vd, const VRegister& vn, int shift) { + USE(shift); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is8H() && vn.Is8B() && shift == 8) || + (vd.Is4S() && vn.Is4H() && shift == 16) || + (vd.Is2D() && vn.Is2S() && shift == 32)); + Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd)); +} + + +void Assembler::shll2(const VRegister& vd, const VRegister& vn, int shift) { + USE(shift); + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is8H() && vn.Is16B() && shift == 8) || + (vd.Is4S() && vn.Is8H() && shift == 16) || + (vd.Is2D() && vn.Is4S() && shift == 32)); + Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONFP2RegMisc(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop, + double value) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(value == 0.0); + USE(value); + + Instr op = vop; + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + } else { + VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); + } + + Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONFP2RegMiscFP16(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscFP16Op vop, + double value) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(value == 0.0); + USE(value); + + Instr op = vop; + if (vd.IsScalar()) { + VIXL_ASSERT(vd.Is1H()); + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + } else { + VIXL_ASSERT(vd.Is4H() || vd.Is8H()); + if (vd.Is8H()) { + op |= static_cast(NEON_Q); + } + } + + Emit(op | Rn(vn) | Rd(vd)); +} + + +void Assembler::fcmeq(const VRegister& vd, const VRegister& vn, double value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vd.IsLaneSizeH()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + NEONFP2RegMiscFP16(vd, vn, NEON_FCMEQ_H_zero, value); + } else { + NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value); + } +} + + +void Assembler::fcmge(const VRegister& vd, const VRegister& vn, double value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vd.IsLaneSizeH()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + NEONFP2RegMiscFP16(vd, vn, NEON_FCMGE_H_zero, value); + } else { + NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value); + } +} + + +void Assembler::fcmgt(const VRegister& vd, const VRegister& vn, double value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vd.IsLaneSizeH()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + NEONFP2RegMiscFP16(vd, vn, NEON_FCMGT_H_zero, value); + } else { + NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value); + } +} + + +void Assembler::fcmle(const VRegister& vd, const VRegister& vn, double value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vd.IsLaneSizeH()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + NEONFP2RegMiscFP16(vd, vn, NEON_FCMLE_H_zero, value); + } else { + NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value); + } +} + + +void Assembler::fcmlt(const VRegister& vd, const VRegister& vn, double value) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + if (vd.IsLaneSizeH()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + NEONFP2RegMiscFP16(vd, vn, NEON_FCMLT_H_zero, value); + } else { + NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value); + } +} + + +void Assembler::frecpx(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(AreSameFormat(vd, vn)); + Instr op; + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + op = NEON_FRECPX_H_scalar; + } else { + VIXL_ASSERT(vd.Is1S() || vd.Is1D()); + op = NEON_FRECPX_scalar; + } + Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd)); +} + + +// clang-format off +#define NEON_3SAME_LIST(V) \ + V(add, NEON_ADD, vd.IsVector() || vd.Is1D()) \ + V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D()) \ + V(sub, NEON_SUB, vd.IsVector() || vd.Is1D()) \ + V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D()) \ + V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D()) \ + V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D()) \ + V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D()) \ + V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D()) \ + V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D()) \ + V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D()) \ + V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D()) \ + V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D()) \ + V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D()) \ + V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \ + V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \ + V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD()) \ + V(and_, NEON_AND, vd.Is8B() || vd.Is16B()) \ + V(orr, NEON_ORR, vd.Is8B() || vd.Is16B()) \ + V(orn, NEON_ORN, vd.Is8B() || vd.Is16B()) \ + V(eor, NEON_EOR, vd.Is8B() || vd.Is16B()) \ + V(bic, NEON_BIC, vd.Is8B() || vd.Is16B()) \ + V(bit, NEON_BIT, vd.Is8B() || vd.Is16B()) \ + V(bif, NEON_BIF, vd.Is8B() || vd.Is16B()) \ + V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B()) \ + V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B()) \ + V(uqadd, NEON_UQADD, true) \ + V(sqadd, NEON_SQADD, true) \ + V(uqsub, NEON_UQSUB, true) \ + V(sqsub, NEON_SQSUB, true) \ + V(sqshl, NEON_SQSHL, true) \ + V(uqshl, NEON_UQSHL, true) \ + V(sqrshl, NEON_SQRSHL, true) \ + V(uqrshl, NEON_UQRSHL, true) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(AS); \ + NEON3Same(vd, vn, vm, OP); \ + } +NEON_3SAME_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// clang-format off +#define NEON_FP3SAME_OP_LIST(V) \ + V(fmulx, NEON_FMULX, NEON_FMULX_scalar, NEON_FMULX_H_scalar) \ + V(frecps, NEON_FRECPS, NEON_FRECPS_scalar, NEON_FRECPS_H_scalar) \ + V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar, NEON_FRSQRTS_H_scalar) \ + V(fabd, NEON_FABD, NEON_FABD_scalar, NEON_FABD_H_scalar) \ + V(fmla, NEON_FMLA, 0, 0) \ + V(fmls, NEON_FMLS, 0, 0) \ + V(facge, NEON_FACGE, NEON_FACGE_scalar, NEON_FACGE_H_scalar) \ + V(facgt, NEON_FACGT, NEON_FACGT_scalar, NEON_FACGT_H_scalar) \ + V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar, NEON_FCMEQ_H_scalar) \ + V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar, NEON_FCMGE_H_scalar) \ + V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar, NEON_FCMGT_H_scalar) \ + V(faddp, NEON_FADDP, 0, 0) \ + V(fmaxp, NEON_FMAXP, 0, 0) \ + V(fminp, NEON_FMINP, 0, 0) \ + V(fmaxnmp, NEON_FMAXNMP, 0, 0) \ + V(fadd, NEON_FADD, FADD, 0) \ + V(fsub, NEON_FSUB, FSUB, 0) \ + V(fmul, NEON_FMUL, FMUL, 0) \ + V(fdiv, NEON_FDIV, FDIV, 0) \ + V(fmax, NEON_FMAX, FMAX, 0) \ + V(fmin, NEON_FMIN, FMIN, 0) \ + V(fmaxnm, NEON_FMAXNM, FMAXNM, 0) \ + V(fminnm, NEON_FMINNM, FMINNM, 0) \ + V(fminnmp, NEON_FMINNMP, 0, 0) +// clang-format on + +// TODO: This macro is complicated because it classifies the instructions in the +// macro list above, and treats each case differently. It could be somewhat +// simpler if we were to split the macro, at the cost of some duplication. +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP, SCA_OP_H) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFP)); \ + Instr op; \ + bool is_fp16 = false; \ + if ((SCA_OP != 0) && vd.IsScalar()) { \ + if ((SCA_OP_H != 0) && vd.Is1H()) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kNEONHalf)); \ + is_fp16 = true; \ + op = SCA_OP_H; \ + } else { \ + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); \ + if ((static_cast(SCA_OP) & \ + static_cast(NEONScalar3SameFMask)) == \ + static_cast(NEONScalar3SameFixed)) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ + } else if (vd.Is1H()) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFPHalf)); \ + } \ + op = SCA_OP; \ + } \ + } else { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(vd.IsVector()); \ + if (vd.Is4H() || vd.Is8H()) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ + is_fp16 = true; \ + op = VEC_OP##_H; \ + } else { \ + VIXL_ASSERT(vd.Is2S() || vd.Is2D() || vd.Is4S()); \ + op = VEC_OP; \ + } \ + } \ + if (is_fp16) { \ + NEON3SameFP16(vd, vn, vm, op); \ + } else { \ + NEONFP3Same(vd, vn, vm, op); \ + } \ + } +NEON_FP3SAME_OP_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +// clang-format off +#define NEON_FHM_LIST(V) \ + V(fmlal, NEON_FMLAL) \ + V(fmlal2, NEON_FMLAL2) \ + V(fmlsl, NEON_FMLSL) \ + V(fmlsl2, NEON_FMLSL2) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, VEC_OP) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, \ + CPUFeatures::kFP, \ + CPUFeatures::kNEONHalf, \ + CPUFeatures::kFHM)); \ + VIXL_ASSERT((vd.Is2S() && vn.Is2H() && vm.Is2H()) || \ + (vd.Is4S() && vn.Is4H() && vm.Is4H())); \ + Emit(FPFormat(vd) | VEC_OP | Rm(vm) | Rn(vn) | Rd(vd)); \ + } +NEON_FHM_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +void Assembler::addp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is1D() && vn.Is2D())); + Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd)); +} + + +void Assembler::sqrdmlah(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM)); + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.IsVector() || !vd.IsQ()); + + Instr format, op = NEON_SQRDMLAH; + if (vd.IsScalar()) { + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + format = SFormat(vd); + } else { + format = VFormat(vd); + } + + Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::sqrdmlsh(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM)); + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.IsVector() || !vd.IsQ()); + + Instr format, op = NEON_SQRDMLSH; + if (vd.IsScalar()) { + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + format = SFormat(vd); + } else { + format = VFormat(vd); + } + + Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::sdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kDotProduct)); + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B()) || (vd.Is4S() && vn.Is16B())); + + Emit(VFormat(vd) | NEON_SDOT | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::udot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kDotProduct)); + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B()) || (vd.Is4S() && vn.Is16B())); + + Emit(VFormat(vd) | NEON_UDOT | Rm(vm) | Rn(vn) | Rd(vd)); +} + +void Assembler::usdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM)); + VIXL_ASSERT(AreSameFormat(vn, vm)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B()) || (vd.Is4S() && vn.Is16B())); + + Emit(VFormat(vd) | 0x0e809c00 | Rm(vm) | Rn(vn) | Rd(vd)); +} + +void Assembler::faddp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) || + (vd.Is1H() && vn.Is2H())); + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + Emit(NEON_FADDP_h_scalar | Rn(vn) | Rd(vd)); + } else { + Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd)); + } +} + + +void Assembler::fmaxp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) || + (vd.Is1H() && vn.Is2H())); + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + Emit(NEON_FMAXP_h_scalar | Rn(vn) | Rd(vd)); + } else { + Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd)); + } +} + + +void Assembler::fminp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) || + (vd.Is1H() && vn.Is2H())); + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + Emit(NEON_FMINP_h_scalar | Rn(vn) | Rd(vd)); + } else { + Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd)); + } +} + + +void Assembler::fmaxnmp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) || + (vd.Is1H() && vn.Is2H())); + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + Emit(NEON_FMAXNMP_h_scalar | Rn(vn) | Rd(vd)); + } else { + Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd)); + } +} + + +void Assembler::fminnmp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); + VIXL_ASSERT((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()) || + (vd.Is1H() && vn.Is2H())); + if (vd.Is1H()) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + Emit(NEON_FMINNMP_h_scalar | Rn(vn) | Rd(vd)); + } else { + Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd)); + } +} + + +// v8.3 complex numbers - floating-point complex multiply accumulate. +void Assembler::fcmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + int rot) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kFcma)); + VIXL_ASSERT(vd.IsVector() && AreSameFormat(vd, vn)); + VIXL_ASSERT((vm.IsH() && (vd.Is8H() || vd.Is4H())) || + (vm.IsS() && vd.Is4S())); + if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + int index_num_bits = vd.Is4S() ? 1 : 2; + Emit(VFormat(vd) | Rm(vm) | NEON_FCMLA_byelement | + ImmNEONHLM(vm_index, index_num_bits) | ImmRotFcmlaSca(rot) | Rn(vn) | + Rd(vd)); +} + + +void Assembler::fcmla(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int rot) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kFcma)); + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.IsVector() && !vd.IsLaneSizeB()); + if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + Emit(VFormat(vd) | Rm(vm) | NEON_FCMLA | ImmRotFcmlaVec(rot) | Rn(vn) | + Rd(vd)); +} + + +// v8.3 complex numbers - floating-point complex add. +void Assembler::fcadd(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int rot) { + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kFcma)); + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.IsVector() && !vd.IsLaneSizeB()); + if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); + Emit(VFormat(vd) | Rm(vm) | NEON_FCADD | ImmRotFcadd(rot) | Rn(vn) | Rd(vd)); +} + + +void Assembler::orr(const VRegister& vd, const int imm8, const int left_shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_ORR); +} + + +void Assembler::mov(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + if (vd.IsD()) { + orr(vd.V8B(), vn.V8B(), vn.V8B()); + } else { + VIXL_ASSERT(vd.IsQ()); + orr(vd.V16B(), vn.V16B(), vn.V16B()); + } +} + + +void Assembler::bic(const VRegister& vd, const int imm8, const int left_shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_BIC); +} + + +void Assembler::movi(const VRegister& vd, + const uint64_t imm, + Shift shift, + const int shift_amount) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT((shift == LSL) || (shift == MSL)); + if (vd.Is2D() || vd.Is1D()) { + VIXL_ASSERT(shift_amount == 0); + int imm8 = 0; + for (int i = 0; i < 8; ++i) { + int byte = (imm >> (i * 8)) & 0xff; + VIXL_ASSERT((byte == 0) || (byte == 0xff)); + if (byte == 0xff) { + imm8 |= (1 << i); + } + } + int q = vd.Is2D() ? NEON_Q : 0; + Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI | + ImmNEONabcdefgh(imm8) | NEONCmode(0xe) | Rd(vd)); + } else if (shift == LSL) { + VIXL_ASSERT(IsUint8(imm)); + NEONModifiedImmShiftLsl(vd, + static_cast(imm), + shift_amount, + NEONModifiedImmediate_MOVI); + } else { + VIXL_ASSERT(IsUint8(imm)); + NEONModifiedImmShiftMsl(vd, + static_cast(imm), + shift_amount, + NEONModifiedImmediate_MOVI); + } +} + + +void Assembler::mvn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + if (vd.IsD()) { + not_(vd.V8B(), vn.V8B()); + } else { + VIXL_ASSERT(vd.IsQ()); + not_(vd.V16B(), vn.V16B()); + } +} + + +void Assembler::mvni(const VRegister& vd, + const int imm8, + Shift shift, + const int shift_amount) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT((shift == LSL) || (shift == MSL)); + if (shift == LSL) { + NEONModifiedImmShiftLsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI); + } else { + NEONModifiedImmShiftMsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI); + } +} + + +void Assembler::NEONFPByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp vop, + NEONByIndexedElementOp vop_half) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT((vd.Is2S() && vm.Is1S()) || (vd.Is4S() && vm.Is1S()) || + (vd.Is1S() && vm.Is1S()) || (vd.Is2D() && vm.Is1D()) || + (vd.Is1D() && vm.Is1D()) || (vd.Is4H() && vm.Is1H()) || + (vd.Is8H() && vm.Is1H()) || (vd.Is1H() && vm.Is1H())); + VIXL_ASSERT((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)) || + (vm.Is1H() && (vm.GetCode() < 16) && (vm_index < 8))); + + Instr op = vop; + int index_num_bits; + if (vm.Is1D()) { + index_num_bits = 1; + } else if (vm.Is1S()) { + index_num_bits = 2; + } else { + index_num_bits = 3; + op = vop_half; + } + + if (vd.IsScalar()) { + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + } + + if (!vm.Is1H()) { + op |= FPFormat(vd); + } else if (vd.Is8H()) { + op |= static_cast(NEON_Q); + } + + Emit(op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONByElement(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp vop) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT((vd.Is4H() && vm.Is1H()) || (vd.Is8H() && vm.Is1H()) || + (vd.Is1H() && vm.Is1H()) || (vd.Is2S() && vm.Is1S()) || + (vd.Is4S() && vm.Is1S()) || (vd.Is1S() && vm.Is1S())); + VIXL_ASSERT((vm.Is1H() && (vm.GetCode() < 16) && (vm_index < 8)) || + (vm.Is1S() && (vm_index < 4))); + + Instr format, op = vop; + int index_num_bits = vm.Is1H() ? 3 : 2; + if (vd.IsScalar()) { + op |= static_cast(NEONScalar) | static_cast(NEON_Q); + format = SFormat(vn); + } else { + format = VFormat(vn); + } + Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | + Rd(vd)); +} + + +void Assembler::NEONByElementL(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index, + NEONByIndexedElementOp vop) { + VIXL_ASSERT((vd.Is4S() && vn.Is4H() && vm.Is1H()) || + (vd.Is4S() && vn.Is8H() && vm.Is1H()) || + (vd.Is1S() && vn.Is1H() && vm.Is1H()) || + (vd.Is2D() && vn.Is2S() && vm.Is1S()) || + (vd.Is2D() && vn.Is4S() && vm.Is1S()) || + (vd.Is1D() && vn.Is1S() && vm.Is1S())); + + VIXL_ASSERT((vm.Is1H() && (vm.GetCode() < 16) && (vm_index < 8)) || + (vm.Is1S() && (vm_index < 4))); + + Instr format, op = vop; + int index_num_bits = vm.Is1H() ? 3 : 2; + if (vd.IsScalar()) { + op |= static_cast(NEONScalar) | static_cast(NEON_Q); + format = SFormat(vn); + } else { + format = VFormat(vn); + } + Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | + Rd(vd)); +} + + +void Assembler::sdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kDotProduct)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + + int index_num_bits = 2; + Emit(VFormat(vd) | NEON_SDOT_byelement | + ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::udot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kDotProduct)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + + int index_num_bits = 2; + Emit(VFormat(vd) | NEON_UDOT_byelement | + ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | Rd(vd)); +} + +void Assembler::sudot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + int q = vd.Is4S() ? (1U << NEONQ_offset) : 0; + int index_num_bits = 2; + Emit(q | 0x0f00f000 | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | + Rd(vd)); +} + + +void Assembler::usdot(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int vm_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kI8MM)); + VIXL_ASSERT((vd.Is2S() && vn.Is8B() && vm.Is1S4B()) || + (vd.Is4S() && vn.Is16B() && vm.Is1S4B())); + int q = vd.Is4S() ? (1U << NEONQ_offset) : 0; + int index_num_bits = 2; + Emit(q | 0x0f80f000 | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) | + Rd(vd)); +} + +// clang-format off +#define NEON_BYELEMENT_LIST(V) \ + V(mul, NEON_MUL_byelement, vn.IsVector()) \ + V(mla, NEON_MLA_byelement, vn.IsVector()) \ + V(mls, NEON_MLS_byelement, vn.IsVector()) \ + V(sqdmulh, NEON_SQDMULH_byelement, true) \ + V(sqrdmulh, NEON_SQRDMULH_byelement, true) \ +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(AS); \ + NEONByElement(vd, vn, vm, vm_index, OP); \ + } +NEON_BYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +// clang-format off +#define NEON_BYELEMENT_RDM_LIST(V) \ + V(sqrdmlah, NEON_SQRDMLAH_byelement) \ + V(sqrdmlsh, NEON_SQRDMLSH_byelement) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, CPUFeatures::kRDM)); \ + NEONByElement(vd, vn, vm, vm_index, OP); \ + } +NEON_BYELEMENT_RDM_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +// clang-format off +#define NEON_FPBYELEMENT_LIST(V) \ + V(fmul, NEON_FMUL_byelement, NEON_FMUL_H_byelement) \ + V(fmla, NEON_FMLA_byelement, NEON_FMLA_H_byelement) \ + V(fmls, NEON_FMLS_byelement, NEON_FMLS_H_byelement) \ + V(fmulx, NEON_FMULX_byelement, NEON_FMULX_H_byelement) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \ + if (vd.IsLaneSizeH()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ + NEONFPByElement(vd, vn, vm, vm_index, OP, OP_H); \ + } +NEON_FPBYELEMENT_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +// clang-format off +#define NEON_BYELEMENT_LONG_LIST(V) \ + V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD()) \ + V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \ + V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD()) \ + V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \ + V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD()) \ + V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \ + V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD()) \ + V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ()) \ + V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD()) \ + V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ()) \ + V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD()) \ + V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ()) \ + V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD()) \ + V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ()) \ + V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD()) \ + V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ()) \ + V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD()) \ + V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ()) +// clang-format on + + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, AS) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + VIXL_ASSERT(AS); \ + NEONByElementL(vd, vn, vm, vm_index, OP); \ + } +NEON_BYELEMENT_LONG_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +// clang-format off +#define NEON_BYELEMENT_FHM_LIST(V) \ + V(fmlal, NEON_FMLAL_H_byelement) \ + V(fmlal2, NEON_FMLAL2_H_byelement) \ + V(fmlsl, NEON_FMLSL_H_byelement) \ + V(fmlsl2, NEON_FMLSL2_H_byelement) +// clang-format on + + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const VRegister& vd, \ + const VRegister& vn, \ + const VRegister& vm, \ + int vm_index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON, \ + CPUFeatures::kFP, \ + CPUFeatures::kNEONHalf, \ + CPUFeatures::kFHM)); \ + VIXL_ASSERT((vd.Is2S() && vn.Is2H()) || (vd.Is4S() && vn.Is4H())); \ + VIXL_ASSERT(vm.IsH()); \ + VIXL_ASSERT((vm_index >= 0) && (vm_index < 8)); \ + /* Vm itself can only be in the bottom 16 registers. */ \ + VIXL_ASSERT(vm.GetCode() < 16); \ + Emit(FPFormat(vd) | OP | Rd(vd) | Rn(vn) | Rm(vm) | \ + ImmNEONHLM(vm_index, 3)); \ + } +NEON_BYELEMENT_FHM_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +void Assembler::suqadd(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEON2RegMisc(vd, vn, NEON_SUQADD); +} + + +void Assembler::usqadd(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEON2RegMisc(vd, vn, NEON_USQADD); +} + + +void Assembler::abs(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_ABS); +} + + +void Assembler::sqabs(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEON2RegMisc(vd, vn, NEON_SQABS); +} + + +void Assembler::neg(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEON2RegMisc(vd, vn, NEON_NEG); +} + + +void Assembler::sqneg(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEON2RegMisc(vd, vn, NEON_SQNEG); +} + + +void Assembler::NEONXtn(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp vop) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + Instr format, op = vop; + if (vd.IsScalar()) { + VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) || + (vd.Is1S() && vn.Is1D())); + op |= static_cast(NEON_Q) | static_cast(NEONScalar); + format = SFormat(vd); + } else { + VIXL_ASSERT((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) || + (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) || + (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D())); + format = VFormat(vd); + } + Emit(format | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::xtn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() && vd.IsD()); + NEONXtn(vd, vn, NEON_XTN); +} + + +void Assembler::xtn2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_XTN); +} + + +void Assembler::sqxtn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsScalar() || vd.IsD()); + NEONXtn(vd, vn, NEON_SQXTN); +} + + +void Assembler::sqxtn2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_SQXTN); +} + + +void Assembler::sqxtun(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsScalar() || vd.IsD()); + NEONXtn(vd, vn, NEON_SQXTUN); +} + + +void Assembler::sqxtun2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_SQXTUN); +} + + +void Assembler::uqxtn(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsScalar() || vd.IsD()); + NEONXtn(vd, vn, NEON_UQXTN); +} + + +void Assembler::uqxtn2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() && vd.IsQ()); + NEONXtn(vd, vn, NEON_UQXTN); +} + + +// NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size". +void Assembler::not_(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd)); +} + + +void Assembler::rbit(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd)); +} + + +void Assembler::ext(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + VIXL_ASSERT((0 <= index) && (index < vd.GetLanes())); + Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd)); +} + + +void Assembler::dup(const VRegister& vd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + Instr q, scalar; + + // We support vn arguments of the form vn.VxT() or vn.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vn.GetLaneSizeInBytes(); + NEONFormatField format; + switch (lane_size) { + case 1: + format = NEON_16B; + break; + case 2: + format = NEON_8H; + break; + case 4: + format = NEON_4S; + break; + default: + VIXL_ASSERT(lane_size == 8); + format = NEON_2D; + break; + } + + if (vd.IsScalar()) { + q = NEON_Q; + scalar = NEONScalar; + } else { + VIXL_ASSERT(!vd.Is1D()); + q = vd.IsD() ? 0 : NEON_Q; + scalar = 0; + } + Emit(q | scalar | NEON_DUP_ELEMENT | ImmNEON5(format, vn_index) | Rn(vn) | + Rd(vd)); +} + + +void Assembler::mov(const VRegister& vd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsScalar()); + dup(vd, vn, vn_index); +} + + +void Assembler::dup(const VRegister& vd, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(!vd.Is1D()); + VIXL_ASSERT(vd.Is2D() == rn.IsX()); + int q = vd.IsD() ? 0 : NEON_Q; + Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd)); +} + + +void Assembler::ins(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + // We support vd arguments of the form vd.VxT() or vd.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vd.GetLaneSizeInBytes(); + NEONFormatField format; + switch (lane_size) { + case 1: + format = NEON_16B; + break; + case 2: + format = NEON_8H; + break; + case 4: + format = NEON_4S; + break; + default: + VIXL_ASSERT(lane_size == 8); + format = NEON_2D; + break; + } + + VIXL_ASSERT( + (0 <= vd_index) && + (vd_index < LaneCountFromFormat(static_cast(format)))); + VIXL_ASSERT( + (0 <= vn_index) && + (vn_index < LaneCountFromFormat(static_cast(format)))); + Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) | + ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd)); +} + + +void Assembler::mov(const VRegister& vd, + int vd_index, + const VRegister& vn, + int vn_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + ins(vd, vd_index, vn, vn_index); +} + + +void Assembler::ins(const VRegister& vd, int vd_index, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + // We support vd arguments of the form vd.VxT() or vd.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vd.GetLaneSizeInBytes(); + NEONFormatField format; + switch (lane_size) { + case 1: + format = NEON_16B; + VIXL_ASSERT(rn.IsW()); + break; + case 2: + format = NEON_8H; + VIXL_ASSERT(rn.IsW()); + break; + case 4: + format = NEON_4S; + VIXL_ASSERT(rn.IsW()); + break; + default: + VIXL_ASSERT(lane_size == 8); + VIXL_ASSERT(rn.IsX()); + format = NEON_2D; + break; + } + + VIXL_ASSERT( + (0 <= vd_index) && + (vd_index < LaneCountFromFormat(static_cast(format)))); + Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd)); +} + + +void Assembler::mov(const VRegister& vd, int vd_index, const Register& rn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + ins(vd, vd_index, rn); +} + + +void Assembler::umov(const Register& rd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + // We support vn arguments of the form vn.VxT() or vn.T(), where x is the + // number of lanes, and T is b, h, s or d. + int lane_size = vn.GetLaneSizeInBytes(); + NEONFormatField format; + Instr q = 0; + switch (lane_size) { + case 1: + format = NEON_16B; + VIXL_ASSERT(rd.IsW()); + break; + case 2: + format = NEON_8H; + VIXL_ASSERT(rd.IsW()); + break; + case 4: + format = NEON_4S; + VIXL_ASSERT(rd.IsW()); + break; + default: + VIXL_ASSERT(lane_size == 8); + VIXL_ASSERT(rd.IsX()); + format = NEON_2D; + q = NEON_Q; + break; + } + + VIXL_ASSERT( + (0 <= vn_index) && + (vn_index < LaneCountFromFormat(static_cast(format)))); + Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd)); +} + + +void Assembler::mov(const Register& rd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.GetSizeInBytes() >= 4); + umov(rd, vn, vn_index); +} + + +void Assembler::smov(const Register& rd, const VRegister& vn, int vn_index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + // We support vn arguments of the form vn.VxT() or vn.T(), where x is the + // number of lanes, and T is b, h, s. + int lane_size = vn.GetLaneSizeInBytes(); + NEONFormatField format; + Instr q = 0; + VIXL_ASSERT(lane_size != 8); + switch (lane_size) { + case 1: + format = NEON_16B; + break; + case 2: + format = NEON_8H; + break; + default: + VIXL_ASSERT(lane_size == 4); + VIXL_ASSERT(rd.IsX()); + format = NEON_4S; + break; + } + q = rd.IsW() ? 0 : NEON_Q; + VIXL_ASSERT( + (0 <= vn_index) && + (vn_index < LaneCountFromFormat(static_cast(format)))); + Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd)); +} + + +void Assembler::cls(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(!vd.Is1D() && !vd.Is2D()); + Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd)); +} + + +void Assembler::clz(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(!vd.Is1D() && !vd.Is2D()); + Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd)); +} + + +void Assembler::cnt(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd)); +} + + +void Assembler::rev16(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B()); + Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd)); +} + + +void Assembler::rev32(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H()); + Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd)); +} + + +void Assembler::rev64(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(!vd.Is1D() && !vd.Is2D()); + Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd)); +} + + +void Assembler::ursqrte(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd)); +} + + +void Assembler::urecpe(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(AreSameFormat(vd, vn)); + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONAddlp(const VRegister& vd, + const VRegister& vn, + NEON2RegMiscOp op) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT((op == NEON_SADDLP) || (op == NEON_UADDLP) || + (op == NEON_SADALP) || (op == NEON_UADALP)); + + VIXL_ASSERT((vn.Is8B() && vd.Is4H()) || (vn.Is4H() && vd.Is2S()) || + (vn.Is2S() && vd.Is1D()) || (vn.Is16B() && vd.Is8H()) || + (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D())); + Emit(VFormat(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::saddlp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONAddlp(vd, vn, NEON_SADDLP); +} + + +void Assembler::uaddlp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONAddlp(vd, vn, NEON_UADDLP); +} + + +void Assembler::sadalp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONAddlp(vd, vn, NEON_SADALP); +} + + +void Assembler::uadalp(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONAddlp(vd, vn, NEON_UADALP); +} + + +void Assembler::NEONAcrossLanesL(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op) { + VIXL_ASSERT((vn.Is8B() && vd.Is1H()) || (vn.Is16B() && vd.Is1H()) || + (vn.Is4H() && vd.Is1S()) || (vn.Is8H() && vd.Is1S()) || + (vn.Is4S() && vd.Is1D())); + Emit(VFormat(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::saddlv(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONAcrossLanesL(vd, vn, NEON_SADDLV); +} + + +void Assembler::uaddlv(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONAcrossLanesL(vd, vn, NEON_UADDLV); +} + + +void Assembler::NEONAcrossLanes(const VRegister& vd, + const VRegister& vn, + NEONAcrossLanesOp op, + Instr op_half) { + VIXL_ASSERT((vn.Is8B() && vd.Is1B()) || (vn.Is16B() && vd.Is1B()) || + (vn.Is4H() && vd.Is1H()) || (vn.Is8H() && vd.Is1H()) || + (vn.Is4S() && vd.Is1S())); + if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) { + if (vd.Is1H()) { + VIXL_ASSERT(op_half != 0); + Instr vop = op_half; + if (vn.Is8H()) { + vop |= NEON_Q; + } + Emit(vop | Rn(vn) | Rd(vd)); + } else { + Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd)); + } + } else { + Emit(VFormat(vn) | op | Rn(vn) | Rd(vd)); + } +} + +// clang-format off +#define NEON_ACROSSLANES_LIST(V) \ + V(addv, NEON_ADDV) \ + V(smaxv, NEON_SMAXV) \ + V(sminv, NEON_SMINV) \ + V(umaxv, NEON_UMAXV) \ + V(uminv, NEON_UMINV) +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); \ + NEONAcrossLanes(vd, vn, OP, 0); \ + } +NEON_ACROSSLANES_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +// clang-format off +#define NEON_ACROSSLANES_FP_LIST(V) \ + V(fmaxv, NEON_FMAXV, NEON_FMAXV_H) \ + V(fminv, NEON_FMINV, NEON_FMINV_H) \ + V(fmaxnmv, NEON_FMAXNMV, NEON_FMAXNMV_H) \ + V(fminnmv, NEON_FMINNMV, NEON_FMINNMV_H) \ +// clang-format on + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, OP_H) \ + void Assembler::FN(const VRegister& vd, const VRegister& vn) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kFP, CPUFeatures::kNEON)); \ + if (vd.Is1H()) VIXL_ASSERT(CPUHas(CPUFeatures::kNEONHalf)); \ + VIXL_ASSERT(vd.Is1S() || vd.Is1H()); \ + NEONAcrossLanes(vd, vn, OP, OP_H); \ + } +NEON_ACROSSLANES_FP_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + + +void Assembler::NEONPerm(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + NEONPermOp op) { + VIXL_ASSERT(AreSameFormat(vd, vn, vm)); + VIXL_ASSERT(!vd.Is1D()); + Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd)); +} + + +void Assembler::trn1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONPerm(vd, vn, vm, NEON_TRN1); +} + + +void Assembler::trn2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONPerm(vd, vn, vm, NEON_TRN2); +} + + +void Assembler::uzp1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONPerm(vd, vn, vm, NEON_UZP1); +} + + +void Assembler::uzp2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONPerm(vd, vn, vm, NEON_UZP2); +} + + +void Assembler::zip1(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONPerm(vd, vn, vm, NEON_ZIP1); +} + + +void Assembler::zip2(const VRegister& vd, + const VRegister& vn, + const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONPerm(vd, vn, vm, NEON_ZIP2); +} + + +void Assembler::NEONShiftImmediate(const VRegister& vd, + const VRegister& vn, + NEONShiftImmediateOp op, + int immh_immb) { + VIXL_ASSERT(AreSameFormat(vd, vn)); + Instr q, scalar; + if (vn.IsScalar()) { + q = NEON_Q; + scalar = NEONScalar; + } else { + q = vd.IsD() ? 0 : NEON_Q; + scalar = 0; + } + Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONShiftLeftImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + int lane_size_in_bits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); + NEONShiftImmediate(vd, vn, op, (lane_size_in_bits + shift) << 16); +} + + +void Assembler::NEONShiftRightImmediate(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + int lane_size_in_bits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits)); + NEONShiftImmediate(vd, vn, op, ((2 * lane_size_in_bits) - shift) << 16); +} + + +void Assembler::NEONShiftImmediateL(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + int lane_size_in_bits = vn.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); + int immh_immb = (lane_size_in_bits + shift) << 16; + + VIXL_ASSERT((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) || + (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) || + (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D())); + Instr q; + q = vn.IsD() ? 0 : NEON_Q; + Emit(q | op | immh_immb | Rn(vn) | Rd(vd)); +} + + +void Assembler::NEONShiftImmediateN(const VRegister& vd, + const VRegister& vn, + int shift, + NEONShiftImmediateOp op) { + Instr q, scalar; + int lane_size_in_bits = vd.GetLaneSizeInBits(); + VIXL_ASSERT((shift >= 1) && (shift <= lane_size_in_bits)); + int immh_immb = (2 * lane_size_in_bits - shift) << 16; + + if (vn.IsScalar()) { + VIXL_ASSERT((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) || + (vd.Is1S() && vn.Is1D())); + q = NEON_Q; + scalar = NEONScalar; + } else { + VIXL_ASSERT((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) || + (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) || + (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D())); + scalar = 0; + q = vd.IsD() ? 0 : NEON_Q; + } + Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd)); +} + + +void Assembler::shl(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL); +} + + +void Assembler::sli(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI); +} + + +void Assembler::sqshl(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm); +} + + +void Assembler::sqshlu(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU); +} + + +void Assembler::uqshl(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm); +} + + +void Assembler::sshll(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsD()); + NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL); +} + + +void Assembler::sshll2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsQ()); + NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL); +} + + +void Assembler::sxtl(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + sshll(vd, vn, 0); +} + + +void Assembler::sxtl2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + sshll2(vd, vn, 0); +} + + +void Assembler::ushll(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsD()); + NEONShiftImmediateL(vd, vn, shift, NEON_USHLL); +} + + +void Assembler::ushll2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsQ()); + NEONShiftImmediateL(vd, vn, shift, NEON_USHLL); +} + + +void Assembler::uxtl(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + ushll(vd, vn, 0); +} + + +void Assembler::uxtl2(const VRegister& vd, const VRegister& vn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + ushll2(vd, vn, 0); +} + + +void Assembler::sri(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SRI); +} + + +void Assembler::sshr(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR); +} + + +void Assembler::ushr(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_USHR); +} + + +void Assembler::srshr(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR); +} + + +void Assembler::urshr(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR); +} + + +void Assembler::ssra(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA); +} + + +void Assembler::usra(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_USRA); +} + + +void Assembler::srsra(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA); +} + + +void Assembler::ursra(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsVector() || vd.Is1D()); + NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA); +} + + +void Assembler::shrn(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsD()); + NEONShiftImmediateN(vd, vn, shift, NEON_SHRN); +} + + +void Assembler::shrn2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SHRN); +} + + +void Assembler::rshrn(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsD()); + NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN); +} + + +void Assembler::rshrn2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN); +} + + +void Assembler::sqshrn(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN); +} + + +void Assembler::sqshrn2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN); +} + + +void Assembler::sqrshrn(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN); +} + + +void Assembler::sqrshrn2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN); +} + + +void Assembler::sqshrun(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN); +} + + +void Assembler::sqshrun2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN); +} + + +void Assembler::sqrshrun(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN); +} + + +void Assembler::sqrshrun2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN); +} + + +void Assembler::uqshrn(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN); +} + + +void Assembler::uqshrn2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN); +} + + +void Assembler::uqrshrn(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vd.IsD() || (vn.IsScalar() && vd.IsScalar())); + NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN); +} + + +void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(vn.IsVector() && vd.IsQ()); + NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN); +} + +void Assembler::smmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM)); + VIXL_ASSERT(vd.IsLaneSizeS()); + VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB()); + + Emit(0x4e80a400 | Rd(vd) | Rn(vn) | Rm(vm)); +} + +void Assembler::usmmla(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM)); + VIXL_ASSERT(vd.IsLaneSizeS()); + VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB()); + + Emit(0x4e80ac00 | Rd(vd) | Rn(vn) | Rm(vm)); +} + +void Assembler::ummla(const VRegister& vd, const VRegister& vn, const VRegister& vm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kNEON)); + VIXL_ASSERT(CPUHas(CPUFeatures::kI8MM)); + VIXL_ASSERT(vd.IsLaneSizeS()); + VIXL_ASSERT(vn.IsLaneSizeB() && vm.IsLaneSizeB()); + + Emit(0x6e80a400 | Rd(vd) | Rn(vn) | Rm(vm)); +} + +// Note: +// For all ToImm instructions below, a difference in case +// for the same letter indicates a negated bit. +// If b is 1, then B is 0. +uint32_t Assembler::FP16ToImm8(Float16 imm) { + VIXL_ASSERT(IsImmFP16(imm)); + // Half: aBbb.cdef.gh00.0000 (16 bits) + uint16_t bits = Float16ToRawbits(imm); + // bit7: a000.0000 + uint16_t bit7 = ((bits >> 15) & 0x1) << 7; + // bit6: 0b00.0000 + uint16_t bit6 = ((bits >> 13) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint16_t bit5_to_0 = (bits >> 6) & 0x3f; + uint32_t result = static_cast(bit7 | bit6 | bit5_to_0); + return result; +} + + +Instr Assembler::ImmFP16(Float16 imm) { + return FP16ToImm8(imm) << ImmFP_offset; +} + + +uint32_t Assembler::FP32ToImm8(float imm) { + // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 + uint32_t bits = FloatToRawbits(imm); + VIXL_ASSERT(IsImmFP32(bits)); + // bit7: a000.0000 + uint32_t bit7 = ((bits >> 31) & 0x1) << 7; + // bit6: 0b00.0000 + uint32_t bit6 = ((bits >> 29) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint32_t bit5_to_0 = (bits >> 19) & 0x3f; + + return bit7 | bit6 | bit5_to_0; +} + + +Instr Assembler::ImmFP32(float imm) { return FP32ToImm8(imm) << ImmFP_offset; } + + +uint32_t Assembler::FP64ToImm8(double imm) { + // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + uint64_t bits = DoubleToRawbits(imm); + VIXL_ASSERT(IsImmFP64(bits)); + // bit7: a000.0000 + uint64_t bit7 = ((bits >> 63) & 0x1) << 7; + // bit6: 0b00.0000 + uint64_t bit6 = ((bits >> 61) & 0x1) << 6; + // bit5_to_0: 00cd.efgh + uint64_t bit5_to_0 = (bits >> 48) & 0x3f; + + return static_cast(bit7 | bit6 | bit5_to_0); +} + + +Instr Assembler::ImmFP64(double imm) { return FP64ToImm8(imm) << ImmFP_offset; } + + +// Code generation helpers. +bool Assembler::OneInstrMoveImmediateHelper(Assembler* assm, + const Register& dst, + uint64_t imm) { + bool emit_code = assm != NULL; + unsigned n, imm_s, imm_r; + int reg_size = dst.GetSizeInBits(); + + if (IsImmMovz(imm, reg_size) && !dst.IsSP()) { + // Immediate can be represented in a move zero instruction. Movz can't write + // to the stack pointer. + if (emit_code) { + assm->movz(dst, imm); + } + return true; + } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) { + // Immediate can be represented in a move negative instruction. Movn can't + // write to the stack pointer. + if (emit_code) { + assm->movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask)); + } + return true; + } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) { + // Immediate can be represented in a logical orr instruction. + VIXL_ASSERT(!dst.IsZero()); + if (emit_code) { + assm->LogicalImmediate(dst, + AppropriateZeroRegFor(dst), + n, + imm_s, + imm_r, + ORR); + } + return true; + } + return false; +} + + +void Assembler::MoveWide(const Register& rd, + uint64_t imm, + int shift, + MoveWideImmediateOp mov_op) { + // Ignore the top 32 bits of an immediate if we're moving to a W register. + if (rd.Is32Bits()) { + // Check that the top 32 bits are zero (a positive 32-bit number) or top + // 33 bits are one (a negative 32-bit number, sign extended to 64 bits). + VIXL_ASSERT(((imm >> kWRegSize) == 0) || + ((imm >> (kWRegSize - 1)) == 0x1ffffffff)); + imm &= kWRegMask; + } + + if (shift >= 0) { + // Explicit shift specified. + VIXL_ASSERT((shift == 0) || (shift == 16) || (shift == 32) || + (shift == 48)); + VIXL_ASSERT(rd.Is64Bits() || (shift == 0) || (shift == 16)); + shift /= 16; + } else { + // Calculate a new immediate and shift combination to encode the immediate + // argument. + VIXL_ASSERT(shift == -1); + shift = 0; + if ((imm & 0xffffffffffff0000) == 0) { + // Nothing to do. + } else if ((imm & 0xffffffff0000ffff) == 0) { + imm >>= 16; + shift = 1; + } else if ((imm & 0xffff0000ffffffff) == 0) { + VIXL_ASSERT(rd.Is64Bits()); + imm >>= 32; + shift = 2; + } else if ((imm & 0x0000ffffffffffff) == 0) { + VIXL_ASSERT(rd.Is64Bits()); + imm >>= 48; + shift = 3; + } + } + + VIXL_ASSERT(IsUint16(imm)); + + Emit(SF(rd) | MoveWideImmediateFixed | mov_op | Rd(rd) | ImmMoveWide(imm) | + ShiftMoveWide(shift)); +} + + +void Assembler::AddSub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + if (operand.IsImmediate()) { + int64_t immediate = operand.GetImmediate(); + VIXL_ASSERT(IsImmAddSub(immediate)); + Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd); + Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) | + ImmAddSub(static_cast(immediate)) | dest_reg | RnSP(rn)); + } else if (operand.IsShiftedRegister()) { + VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits()); + VIXL_ASSERT(operand.GetShift() != ROR); + + // For instructions of the form: + // add/sub wsp, , [, LSL #0-3 ] + // add/sub , wsp, [, LSL #0-3 ] + // add/sub wsp, wsp, [, LSL #0-3 ] + // adds/subs , wsp, [, LSL #0-3 ] + // or their 64-bit register equivalents, convert the operand from shifted to + // extended register mode, and emit an add/sub extended instruction. + if (rn.IsSP() || rd.IsSP()) { + VIXL_ASSERT(!(rd.IsSP() && (S == SetFlags))); + DataProcExtendedRegister(rd, + rn, + operand.ToExtendedRegister(), + S, + static_cast(AddSubExtendedFixed) | static_cast(op)); + } else { + DataProcShiftedRegister(rd, rn, operand, S, + static_cast(AddSubShiftedFixed) | static_cast(op)); + } + } else { + VIXL_ASSERT(operand.IsExtendedRegister()); + DataProcExtendedRegister(rd, rn, operand, S, + static_cast(AddSubExtendedFixed) | static_cast(op)); + } +} + + +void Assembler::AddSubWithCarry(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + VIXL_ASSERT(rd.GetSizeInBits() == operand.GetRegister().GetSizeInBits()); + VIXL_ASSERT(operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)); + Emit(SF(rd) | op | Flags(S) | Rm(operand.GetRegister()) | Rn(rn) | Rd(rd)); +} + + +void Assembler::hlt(int code) { + VIXL_ASSERT(IsUint16(code)); + Emit(HLT | ImmException(code)); +} + + +void Assembler::brk(int code) { + VIXL_ASSERT(IsUint16(code)); + Emit(BRK | ImmException(code)); +} + + +void Assembler::svc(int code) { Emit(SVC | ImmException(code)); } + +void Assembler::udf(int code) { Emit(UDF | ImmUdf(code)); } + + +// TODO(all): The third parameter should be passed by reference but gcc 4.8.2 +// reports a bogus uninitialised warning then. +void Assembler::Logical(const Register& rd, + const Register& rn, + const Operand operand, + LogicalOp op) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + if (operand.IsImmediate()) { + int64_t immediate = operand.GetImmediate(); + unsigned reg_size = rd.GetSizeInBits(); + + VIXL_ASSERT(immediate != 0); + VIXL_ASSERT(immediate != -1); + VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate)); + + // If the operation is NOT, invert the operation and immediate. + if ((op & NOT) == NOT) { + op = static_cast(op & ~NOT); + immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask); + } + + unsigned n, imm_s, imm_r; + if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) { + // Immediate can be encoded in the instruction. + LogicalImmediate(rd, rn, n, imm_s, imm_r, op); + } else { + // This case is handled in the macro assembler. + VIXL_UNREACHABLE(); + } + } else { + VIXL_ASSERT(operand.IsShiftedRegister()); + VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits()); + Instr dp_op = static_cast(op) | static_cast(LogicalShiftedFixed); + DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op); + } +} + + +void Assembler::LogicalImmediate(const Register& rd, + const Register& rn, + unsigned n, + unsigned imm_s, + unsigned imm_r, + LogicalOp op) { + unsigned reg_size = rd.GetSizeInBits(); + Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd); + Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) | + ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg | + Rn(rn)); +} + + +void Assembler::ConditionalCompare(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op) { + Instr ccmpop; + if (operand.IsImmediate()) { + int64_t immediate = operand.GetImmediate(); + VIXL_ASSERT(IsImmConditionalCompare(immediate)); + ccmpop = static_cast(ConditionalCompareImmediateFixed) | + static_cast(op) | + ImmCondCmp(static_cast(immediate)); + } else { + VIXL_ASSERT(operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)); + ccmpop = static_cast(ConditionalCompareRegisterFixed) | + static_cast(op) | + Rm(operand.GetRegister()); + } + Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv)); +} + + +void Assembler::DataProcessing1Source(const Register& rd, + const Register& rn, + DataProcessing1SourceOp op) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + Emit(SF(rn) | op | Rn(rn) | Rd(rd)); +} + + +void Assembler::FPDataProcessing1Source(const VRegister& vd, + const VRegister& vn, + FPDataProcessing1SourceOp op) { + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + Emit(FPType(vn) | op | Rn(vn) | Rd(vd)); +} + + +void Assembler::FPDataProcessing3Source(const VRegister& vd, + const VRegister& vn, + const VRegister& vm, + const VRegister& va, + FPDataProcessing3SourceOp op) { + VIXL_ASSERT(vd.Is1H() || vd.Is1S() || vd.Is1D()); + VIXL_ASSERT(AreSameSizeAndType(vd, vn, vm, va)); + Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd) | Ra(va)); +} + + +void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd, + const int imm8, + const int left_shift, + NEONModifiedImmediateOp op) { + VIXL_ASSERT(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() || vd.Is2S() || + vd.Is4S()); + VIXL_ASSERT((left_shift == 0) || (left_shift == 8) || (left_shift == 16) || + (left_shift == 24)); + VIXL_ASSERT(IsUint8(imm8)); + + int cmode_1, cmode_2, cmode_3; + if (vd.Is8B() || vd.Is16B()) { + VIXL_ASSERT(op == NEONModifiedImmediate_MOVI); + cmode_1 = 1; + cmode_2 = 1; + cmode_3 = 1; + } else { + cmode_1 = (left_shift >> 3) & 1; + cmode_2 = left_shift >> 4; + cmode_3 = 0; + if (vd.Is4H() || vd.Is8H()) { + VIXL_ASSERT((left_shift == 0) || (left_shift == 8)); + cmode_3 = 1; + } + } + int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1); + + int q = vd.IsQ() ? NEON_Q : 0; + + Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd)); +} + + +void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd, + const int imm8, + const int shift_amount, + NEONModifiedImmediateOp op) { + VIXL_ASSERT(vd.Is2S() || vd.Is4S()); + VIXL_ASSERT((shift_amount == 8) || (shift_amount == 16)); + VIXL_ASSERT(IsUint8(imm8)); + + int cmode_0 = (shift_amount >> 4) & 1; + int cmode = 0xc | cmode_0; + + int q = vd.IsQ() ? NEON_Q : 0; + + Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd)); +} + + +void Assembler::EmitShift(const Register& rd, + const Register& rn, + Shift shift, + unsigned shift_amount) { + switch (shift) { + case LSL: + lsl(rd, rn, shift_amount); + break; + case LSR: + lsr(rd, rn, shift_amount); + break; + case ASR: + asr(rd, rn, shift_amount); + break; + case ROR: + ror(rd, rn, shift_amount); + break; + default: + VIXL_UNREACHABLE(); + } +} + + +void Assembler::EmitExtendShift(const Register& rd, + const Register& rn, + Extend extend, + unsigned left_shift) { + VIXL_ASSERT(rd.GetSizeInBits() >= rn.GetSizeInBits()); + unsigned reg_size = rd.GetSizeInBits(); + // Use the correct size of register. + Register rn_ = Register(rn.GetCode(), rd.GetSizeInBits()); + // Bits extracted are high_bit:0. + unsigned high_bit = (8 << (extend & 0x3)) - 1; + // Number of bits left in the result that are not introduced by the shift. + unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1); + + if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) { + switch (extend) { + case UXTB: + case UXTH: + case UXTW: + ubfm(rd, rn_, non_shift_bits, high_bit); + break; + case SXTB: + case SXTH: + case SXTW: + sbfm(rd, rn_, non_shift_bits, high_bit); + break; + case UXTX: + case SXTX: { + VIXL_ASSERT(rn.GetSizeInBits() == kXRegSize); + // Nothing to extend. Just shift. + lsl(rd, rn_, left_shift); + break; + } + default: + VIXL_UNREACHABLE(); + } + } else { + // No need to extend as the extended bits would be shifted away. + lsl(rd, rn_, left_shift); + } +} + + +void Assembler::DataProcShiftedRegister(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + Instr op) { + VIXL_ASSERT(operand.IsShiftedRegister()); + VIXL_ASSERT(rn.Is64Bits() || + (rn.Is32Bits() && IsUint5(operand.GetShiftAmount()))); + Emit(SF(rd) | op | Flags(S) | ShiftDP(operand.GetShift()) | + ImmDPShift(operand.GetShiftAmount()) | Rm(operand.GetRegister()) | + Rn(rn) | Rd(rd)); +} + + +void Assembler::DataProcExtendedRegister(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + Instr op) { + Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd); + Emit(SF(rd) | op | Flags(S) | Rm(operand.GetRegister()) | + ExtendMode(operand.GetExtend()) | + ImmExtendShift(operand.GetShiftAmount()) | dest_reg | RnSP(rn)); +} + + +Instr Assembler::LoadStoreMemOperand(const MemOperand& addr, + unsigned access_size_in_bytes_log2, + LoadStoreScalingOption option) { + Instr base = RnSP(addr.GetBaseRegister()); + int64_t offset = addr.GetOffset(); + + if (addr.IsImmediateOffset()) { + bool prefer_unscaled = + (option == PreferUnscaledOffset) || (option == RequireUnscaledOffset); + if (prefer_unscaled && IsImmLSUnscaled(offset)) { + // Use the unscaled addressing mode. + return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset); + } + + if ((option != RequireUnscaledOffset) && + IsImmLSScaled(offset, access_size_in_bytes_log2)) { + // We need `offset` to be positive for the shift to be well-defined. + // IsImmLSScaled should check this. + VIXL_ASSERT(offset >= 0); + // Use the scaled addressing mode. + return base | LoadStoreUnsignedOffsetFixed | + ImmLSUnsigned(offset >> access_size_in_bytes_log2); + } + + if ((option != RequireScaledOffset) && IsImmLSUnscaled(offset)) { + // Use the unscaled addressing mode. + return base | LoadStoreUnscaledOffsetFixed | ImmLS(offset); + } + } + + // All remaining addressing modes are register-offset, pre-indexed or + // post-indexed modes. + VIXL_ASSERT((option != RequireUnscaledOffset) && + (option != RequireScaledOffset)); + + if (addr.IsRegisterOffset()) { + Extend ext = addr.GetExtend(); + Shift shift = addr.GetShift(); + unsigned shift_amount = addr.GetShiftAmount(); + + // LSL is encoded in the option field as UXTX. + if (shift == LSL) { + ext = UXTX; + } + + // Shifts are encoded in one bit, indicating a left shift by the memory + // access size. + VIXL_ASSERT((shift_amount == 0) || (shift_amount == access_size_in_bytes_log2)); + return base | LoadStoreRegisterOffsetFixed | Rm(addr.GetRegisterOffset()) | + ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0); + } + + if (addr.IsImmediatePreIndex() && IsImmLSUnscaled(offset)) { + return base | LoadStorePreIndexFixed | ImmLS(offset); + } + + if (addr.IsImmediatePostIndex() && IsImmLSUnscaled(offset)) { + return base | LoadStorePostIndexFixed | ImmLS(offset); + } + + // If this point is reached, the MemOperand (addr) cannot be encoded. + VIXL_UNREACHABLE(); + return 0; +} + + +void Assembler::LoadStore(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op, + LoadStoreScalingOption option) { + VIXL_ASSERT(CPUHas(rt)); + Emit(op | Rt(rt) | LoadStoreMemOperand(addr, CalcLSDataSize(op), option)); +} + +void Assembler::LoadStorePAC(const Register& xt, + const MemOperand& addr, + LoadStorePACOp op) { + VIXL_ASSERT(xt.Is64Bits()); + VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePreIndex()); + + Instr pac_op = op; + if (addr.IsImmediatePreIndex()) { + pac_op |= LoadStorePACPreBit; + } + + Instr base = RnSP(addr.GetBaseRegister()); + int64_t offset = addr.GetOffset(); + + Emit(pac_op | Rt(xt) | base | ImmLSPAC(static_cast(offset))); +} + + +void Assembler::Prefetch(int op, + const MemOperand& addr, + LoadStoreScalingOption option) { + VIXL_ASSERT(addr.IsRegisterOffset() || addr.IsImmediateOffset()); + + Instr prfop = ImmPrefetchOperation(op); + Emit(PRFM | prfop | LoadStoreMemOperand(addr, kXRegSizeInBytesLog2, option)); +} + +void Assembler::Prefetch(PrefetchOperation op, + const MemOperand& addr, + LoadStoreScalingOption option) { + // Passing unnamed values in 'op' is undefined behaviour in C++. + VIXL_ASSERT(IsNamedPrefetchOperation(op)); + Prefetch(static_cast(op), addr, option); +} + + +bool Assembler::IsImmAddSub(int64_t immediate) { + return IsUint12(immediate) || + (IsUint12(immediate >> 12) && ((immediate & 0xfff) == 0)); +} + + +bool Assembler::IsImmConditionalCompare(int64_t immediate) { + return IsUint5(immediate); +} + + +bool Assembler::IsImmFP16(Float16 imm) { + // Valid values will have the form: + // aBbb.cdef.gh00.000 + uint16_t bits = Float16ToRawbits(imm); + // bits[6..0] are cleared. + if ((bits & 0x3f) != 0) { + return false; + } + + // bits[13..12] are all set or all cleared. + uint16_t b_pattern = (bits >> 12) & 0x03; + if (b_pattern != 0 && b_pattern != 0x03) { + return false; + } + + // bit[15] and bit[14] are opposite. + if (((bits ^ (bits << 1)) & 0x4000) == 0) { + return false; + } + + return true; +} + + +bool Assembler::IsImmFP32(uint32_t bits) { + // Valid values will have the form: + // aBbb.bbbc.defg.h000.0000.0000.0000.0000 + // bits[19..0] are cleared. + if ((bits & 0x7ffff) != 0) { + return false; + } + + // bits[29..25] are all set or all cleared. + uint32_t b_pattern = (bits >> 16) & 0x3e00; + if (b_pattern != 0 && b_pattern != 0x3e00) { + return false; + } + + // bit[30] and bit[29] are opposite. + if (((bits ^ (bits << 1)) & 0x40000000) == 0) { + return false; + } + + return true; +} + + +bool Assembler::IsImmFP64(uint64_t bits) { + // Valid values will have the form: + // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 + // bits[47..0] are cleared. + if ((bits & 0x0000ffffffffffff) != 0) { + return false; + } + + // bits[61..54] are all set or all cleared. + uint32_t b_pattern = (bits >> 48) & 0x3fc0; + if ((b_pattern != 0) && (b_pattern != 0x3fc0)) { + return false; + } + + // bit[62] and bit[61] are opposite. + if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) { + return false; + } + + return true; +} + + +bool Assembler::IsImmLSPair(int64_t offset, unsigned access_size_in_bytes_log2) { + const auto access_size_in_bytes = 1U << access_size_in_bytes_log2; + VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2); + return IsMultiple(offset, access_size_in_bytes) && + IsInt7(offset / access_size_in_bytes); +} + + +bool Assembler::IsImmLSScaled(int64_t offset, unsigned access_size_in_bytes_log2) { + const auto access_size_in_bytes = 1U << access_size_in_bytes_log2; + VIXL_ASSERT(access_size_in_bytes_log2 <= kQRegSizeInBytesLog2); + return IsMultiple(offset, access_size_in_bytes) && + IsUint12(offset / access_size_in_bytes); +} + + +bool Assembler::IsImmLSUnscaled(int64_t offset) { return IsInt9(offset); } + + +// The movn instruction can generate immediates containing an arbitrary 16-bit +// value, with remaining bits set, eg. 0xffff1234, 0xffff1234ffffffff. +bool Assembler::IsImmMovn(uint64_t imm, unsigned reg_size) { + return IsImmMovz(~imm, reg_size); +} + + +// The movz instruction can generate immediates containing an arbitrary 16-bit +// value, with remaining bits clear, eg. 0x00001234, 0x0000123400000000. +bool Assembler::IsImmMovz(uint64_t imm, unsigned reg_size) { + VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize)); + return CountClearHalfWords(imm, reg_size) >= ((reg_size / 16) - 1); +} + + +// Test if a given value can be encoded in the immediate field of a logical +// instruction. +// If it can be encoded, the function returns true, and values pointed to by n, +// imm_s and imm_r are updated with immediates encoded in the format required +// by the corresponding fields in the logical instruction. +// If it can not be encoded, the function returns false, and the values pointed +// to by n, imm_s and imm_r are undefined. +bool Assembler::IsImmLogical(uint64_t value, + unsigned width, + unsigned* n, + unsigned* imm_s, + unsigned* imm_r) { + VIXL_ASSERT((width == kBRegSize) || (width == kHRegSize) || + (width == kSRegSize) || (width == kDRegSize)); + + bool negate = false; + + // Logical immediates are encoded using parameters n, imm_s and imm_r using + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 bits + // are set. The pattern is rotated right by R, and repeated across a 32 or + // 64-bit value, depending on destination register width. + // + // Put another way: the basic format of a logical immediate is a single + // contiguous stretch of 1 bits, repeated across the whole word at intervals + // given by a power of 2. To identify them quickly, we first locate the + // lowest stretch of 1 bits, then the next 1 bit above that; that combination + // is different for every logical immediate, so it gives us all the + // information we need to identify the only logical immediate that our input + // could be, and then we simply check if that's the value we actually have. + // + // (The rotation parameter does give the possibility of the stretch of 1 bits + // going 'round the end' of the word. To deal with that, we observe that in + // any situation where that happens the bitwise NOT of the value is also a + // valid logical immediate. So we simply invert the input whenever its low bit + // is set, and then we know that the rotated case can't arise.) + + if (value & 1) { + // If the low bit is 1, negate the value, and set a flag to remember that we + // did (so that we can adjust the return values appropriately). + negate = true; + value = ~value; + } + + if (width <= kWRegSize) { + // To handle 8/16/32-bit logical immediates, the very easiest thing is to repeat + // the input value to fill a 64-bit word. The correct encoding of that as a + // logical immediate will also be the correct encoding of the value. + + // Avoid making the assumption that the most-significant 56/48/32 bits are zero by + // shifting the value left and duplicating it. + for (unsigned bits = width; bits <= kWRegSize; bits *= 2) { + value <<= bits; + uint64_t mask = (UINT64_C(1) << bits) - 1; + value |= ((value >> bits) & mask); + } + } + + // The basic analysis idea: imagine our input word looks like this. + // + // 0011111000111110001111100011111000111110001111100011111000111110 + // c b a + // |<--d-->| + // + // We find the lowest set bit (as an actual power-of-2 value, not its index) + // and call it a. Then we add a to our original number, which wipes out the + // bottommost stretch of set bits and replaces it with a 1 carried into the + // next zero bit. Then we look for the new lowest set bit, which is in + // position b, and subtract it, so now our number is just like the original + // but with the lowest stretch of set bits completely gone. Now we find the + // lowest set bit again, which is position c in the diagram above. Then we'll + // measure the distance d between bit positions a and c (using CLZ), and that + // tells us that the only valid logical immediate that could possibly be equal + // to this number is the one in which a stretch of bits running from a to just + // below b is replicated every d bits. + uint64_t a = LowestSetBit(value); + uint64_t value_plus_a = value + a; + uint64_t b = LowestSetBit(value_plus_a); + uint64_t value_plus_a_minus_b = value_plus_a - b; + uint64_t c = LowestSetBit(value_plus_a_minus_b); + + int d, clz_a, out_n; + uint64_t mask; + + if (c != 0) { + // The general case, in which there is more than one stretch of set bits. + // Compute the repeat distance d, and set up a bitmask covering the basic + // unit of repetition (i.e. a word with the bottom d bits set). Also, in all + // of these cases the N bit of the output will be zero. + clz_a = CountLeadingZeros(a, kXRegSize); + int clz_c = CountLeadingZeros(c, kXRegSize); + d = clz_a - clz_c; + mask = ((UINT64_C(1) << d) - 1); + out_n = 0; + } else { + // Handle degenerate cases. + // + // If any of those 'find lowest set bit' operations didn't find a set bit at + // all, then the word will have been zero thereafter, so in particular the + // last lowest_set_bit operation will have returned zero. So we can test for + // all the special case conditions in one go by seeing if c is zero. + if (a == 0) { + // The input was zero (or all 1 bits, which will come to here too after we + // inverted it at the start of the function), for which we just return + // false. + return false; + } else { + // Otherwise, if c was zero but a was not, then there's just one stretch + // of set bits in our word, meaning that we have the trivial case of + // d == 64 and only one 'repetition'. Set up all the same variables as in + // the general case above, and set the N bit in the output. + clz_a = CountLeadingZeros(a, kXRegSize); + d = 64; + mask = ~UINT64_C(0); + out_n = 1; + } + } + + // If the repeat period d is not a power of two, it can't be encoded. + if (!IsPowerOf2(d)) { + return false; + } + + if (((b - a) & ~mask) != 0) { + // If the bit stretch (b - a) does not fit within the mask derived from the + // repeat period, then fail. + return false; + } + + // The only possible option is b - a repeated every d bits. Now we're going to + // actually construct the valid logical immediate derived from that + // specification, and see if it equals our original input. + // + // To repeat a value every d bits, we multiply it by a number of the form + // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can + // be derived using a table lookup on CLZ(d). + static const uint64_t multipliers[] = { + 0x0000000000000001UL, + 0x0000000100000001UL, + 0x0001000100010001UL, + 0x0101010101010101UL, + 0x1111111111111111UL, + 0x5555555555555555UL, + }; + uint64_t multiplier = multipliers[CountLeadingZeros(d, kXRegSize) - 57]; + uint64_t candidate = (b - a) * multiplier; + + if (value != candidate) { + // The candidate pattern doesn't match our input value, so fail. + return false; + } + + // We have a match! This is a valid logical immediate, so now we have to + // construct the bits and pieces of the instruction encoding that generates + // it. + + // Count the set bits in our basic stretch. The special case of clz(0) == -1 + // makes the answer come out right for stretches that reach the very top of + // the word (e.g. numbers like 0xffffc00000000000). + int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSize); + int s = clz_a - clz_b; + + // Decide how many bits to rotate right by, to put the low bit of that basic + // stretch in position a. + int r; + if (negate) { + // If we inverted the input right at the start of this function, here's + // where we compensate: the number of set bits becomes the number of clear + // bits, and the rotation count is based on position b rather than position + // a (since b is the location of the 'lowest' 1 bit after inversion). + s = d - s; + r = (clz_b + 1) & (d - 1); + } else { + r = (clz_a + 1) & (d - 1); + } + + // Now we're done, except for having to encode the S output in such a way that + // it gives both the number of set bits and the length of the repeated + // segment. The s field is encoded like this: + // + // imms size S + // ssssss 64 UInt(ssssss) + // 0sssss 32 UInt(sssss) + // 10ssss 16 UInt(ssss) + // 110sss 8 UInt(sss) + // 1110ss 4 UInt(ss) + // 11110s 2 UInt(s) + // + // So we 'or' (2 * -d) with our computed s to form imms. + if ((n != NULL) || (imm_s != NULL) || (imm_r != NULL)) { + *n = out_n; + *imm_s = ((2 * -d) | (s - 1)) & 0x3f; + *imm_r = r; + } + + return true; +} + + +LoadStoreOp Assembler::LoadOpFor(const CPURegister& rt) { + VIXL_ASSERT(rt.IsValid()); + if (rt.IsRegister()) { + return rt.Is64Bits() ? LDR_x : LDR_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.GetSizeInBits()) { + case kBRegSize: + return LDR_b; + case kHRegSize: + return LDR_h; + case kSRegSize: + return LDR_s; + case kDRegSize: + return LDR_d; + default: + VIXL_ASSERT(rt.IsQ()); + return LDR_q; + } + } +} + + +LoadStoreOp Assembler::StoreOpFor(const CPURegister& rt) { + VIXL_ASSERT(rt.IsValid()); + if (rt.IsRegister()) { + return rt.Is64Bits() ? STR_x : STR_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.GetSizeInBits()) { + case kBRegSize: + return STR_b; + case kHRegSize: + return STR_h; + case kSRegSize: + return STR_s; + case kDRegSize: + return STR_d; + default: + VIXL_ASSERT(rt.IsQ()); + return STR_q; + } + } +} + + +LoadStorePairOp Assembler::StorePairOpFor(const CPURegister& rt, + const CPURegister& rt2) { + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + USE(rt2); + if (rt.IsRegister()) { + return rt.Is64Bits() ? STP_x : STP_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.GetSizeInBytes()) { + case kSRegSizeInBytes: + return STP_s; + case kDRegSizeInBytes: + return STP_d; + default: + VIXL_ASSERT(rt.IsQ()); + return STP_q; + } + } +} + + +LoadStorePairOp Assembler::LoadPairOpFor(const CPURegister& rt, + const CPURegister& rt2) { + VIXL_ASSERT((STP_w | LoadStorePairLBit) == LDP_w); + return static_cast(StorePairOpFor(rt, rt2) | + LoadStorePairLBit); +} + + +LoadStorePairNonTemporalOp Assembler::StorePairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2) { + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + USE(rt2); + if (rt.IsRegister()) { + return rt.Is64Bits() ? STNP_x : STNP_w; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.GetSizeInBytes()) { + case kSRegSizeInBytes: + return STNP_s; + case kDRegSizeInBytes: + return STNP_d; + default: + VIXL_ASSERT(rt.IsQ()); + return STNP_q; + } + } +} + + +LoadStorePairNonTemporalOp Assembler::LoadPairNonTemporalOpFor( + const CPURegister& rt, const CPURegister& rt2) { + VIXL_ASSERT((STNP_w | LoadStorePairNonTemporalLBit) == LDNP_w); + return static_cast( + StorePairNonTemporalOpFor(rt, rt2) | LoadStorePairNonTemporalLBit); +} + + +LoadLiteralOp Assembler::LoadLiteralOpFor(const CPURegister& rt) { + if (rt.IsRegister()) { + return rt.IsX() ? LDR_x_lit : LDR_w_lit; + } else { + VIXL_ASSERT(rt.IsVRegister()); + switch (rt.GetSizeInBytes()) { + case kSRegSizeInBytes: + return LDR_s_lit; + case kDRegSizeInBytes: + return LDR_d_lit; + default: + VIXL_ASSERT(rt.IsQ()); + return LDR_q_lit; + } + } +} + + +bool Assembler::CPUHas(const CPURegister& rt) const { + // Core registers are available without any particular CPU features. + if (rt.IsRegister()) return true; + VIXL_ASSERT(rt.IsVRegister()); + // The architecture does not allow FP and NEON to be implemented separately, + // but we can crudely categorise them based on register size, since FP only + // uses D, S and (occasionally) H registers. + if (rt.IsH() || rt.IsS() || rt.IsD()) { + return CPUHas(CPUFeatures::kFP) || CPUHas(CPUFeatures::kNEON); + } + VIXL_ASSERT(rt.IsB() || rt.IsQ()); + return CPUHas(CPUFeatures::kNEON); +} + + +bool Assembler::CPUHas(const CPURegister& rt, const CPURegister& rt2) const { + // This is currently only used for loads and stores, where rt and rt2 must + // have the same size and type. We could extend this to cover other cases if + // necessary, but for now we can avoid checking both registers. + VIXL_ASSERT(AreSameSizeAndType(rt, rt2)); + USE(rt2); + return CPUHas(rt); +} + + +bool Assembler::CPUHas(SystemRegister sysreg) const { + switch (sysreg) { + case RNDR: + case RNDRRS: + return CPUHas(CPUFeatures::kRNG); + case FPCR: + case NZCV: + break; + } + return true; +} + + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/assembler-sve-aarch64.cc b/3rdparty/vixl/src/aarch64/assembler-sve-aarch64.cc new file mode 100644 index 0000000000..e99cfdcdff --- /dev/null +++ b/3rdparty/vixl/src/aarch64/assembler-sve-aarch64.cc @@ -0,0 +1,9899 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "assembler-aarch64.h" + +namespace vixl { +namespace aarch64 { + +void Assembler::ResolveSVEImm8Shift(int* imm8, int* shift) { + if (*shift < 0) { + VIXL_ASSERT(*shift == -1); + // Derive the shift amount from the immediate. + if (IsInt8(*imm8)) { + *shift = 0; + } else if ((*imm8 % 256) == 0) { + *imm8 /= 256; + *shift = 8; + } + } + + VIXL_ASSERT(IsInt8(*imm8)); + VIXL_ASSERT((*shift == 0) || (*shift == 8)); +} + +// SVEAddressGeneration. + +void Assembler::adr(const ZRegister& zd, const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsVectorPlusVector()); + VIXL_ASSERT( + AreSameLaneSize(zd, addr.GetVectorBase(), addr.GetVectorOffset())); + + int lane_size = zd.GetLaneSizeInBits(); + VIXL_ASSERT((lane_size == kSRegSize) || (lane_size == kDRegSize)); + + int shift_amount = addr.GetShiftAmount(); + VIXL_ASSERT((shift_amount >= 0) && (shift_amount <= 3)); + + Instr op = 0xffffffff; + Instr msz = shift_amount << 10; + SVEOffsetModifier mod = addr.GetOffsetModifier(); + switch (mod) { + case SVE_UXTW: + VIXL_ASSERT(lane_size == kDRegSize); + op = ADR_z_az_d_u32_scaled; + break; + case SVE_SXTW: + VIXL_ASSERT(lane_size == kDRegSize); + op = ADR_z_az_d_s32_scaled; + break; + case SVE_LSL: + case NO_SVE_OFFSET_MODIFIER: + op = (lane_size == kSRegSize) ? ADR_z_az_s_same_scaled + : ADR_z_az_d_same_scaled; + break; + default: + VIXL_UNIMPLEMENTED(); + } + Emit(op | msz | Rd(zd) | Rn(addr.GetVectorBase()) | + Rm(addr.GetVectorOffset())); +} + +void Assembler::SVELogicalImmediate(const ZRegister& zdn, + uint64_t imm, + Instr op) { + unsigned bit_n, imm_s, imm_r; + unsigned lane_size = zdn.GetLaneSizeInBits(); + // Check that the immediate can be encoded in the instruction. + if (IsImmLogical(imm, lane_size, &bit_n, &imm_s, &imm_r)) { + Emit(op | Rd(zdn) | SVEBitN(bit_n) | SVEImmRotate(imm_r, lane_size) | + SVEImmSetBits(imm_s, lane_size)); + } else { + VIXL_UNREACHABLE(); + } +} + +void Assembler::and_(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + SVELogicalImmediate(zd, imm, AND_z_zi); +} + +void Assembler::dupm(const ZRegister& zd, uint64_t imm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + // DUPM_z_i is an SVEBroadcastBitmaskImmOp, but its encoding and constraints + // are similar enough to SVEBitwiseLogicalWithImm_UnpredicatedOp, that we can + // use the logical immediate encoder to get the correct behaviour. + SVELogicalImmediate(zd, imm, DUPM_z_i); +} + +void Assembler::eor(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + SVELogicalImmediate(zd, imm, EOR_z_zi); +} + +void Assembler::orr(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + SVELogicalImmediate(zd, imm, ORR_z_zi); +} + +// SVEBitwiseLogicalUnpredicated. +void Assembler::and_(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(AND_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bic(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(BIC_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::eor(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(EOR_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::orr(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.IsLaneSizeD()); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + Emit(ORR_z_zz | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEBitwiseShiftPredicated. + +void Assembler::SVEBitwiseShiftImmediatePred(const ZRegister& zdn, + const PRegisterM& pg, + Instr encoded_imm_and_tsz, + Instr op) { + Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) + << 5; + Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; + Emit(op | tszh | tszl_and_imm | PgLow8(pg) | Rd(zdn)); +} + +void Assembler::asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // ASR ., /M, ., # + // 0000 0100 ..00 0000 100. .... .... .... + // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 0 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASR_z_p_zi); +} + +void Assembler::asr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ASR ., /M, ., .D + // 0000 0100 ..01 1000 100. .... .... .... + // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || + ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && + (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); + Instr op = ASR_z_p_zw; + if (AreSameLaneSize(zd, zn, zm)) { + op = ASR_z_p_zz; + } + Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::asrd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // ASRD ., /M, ., # + // 0000 0100 ..00 0100 100. .... .... .... + // tszh<23:22> | opc<19:18> = 01 | L<17> = 0 | U<16> = 0 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, ASRD_z_p_zi); +} + +void Assembler::asrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ASRR ., /M, ., . + // 0000 0100 ..01 0100 100. .... .... .... + // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ASRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // LSL ., /M, ., # + // 0000 0100 ..00 0011 100. .... .... .... + // tszh<23:22> | opc<19:18> = 00 | L<17> = 1 | U<16> = 1 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSL_z_p_zi); +} + +void Assembler::lsl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSL ., /M, ., .D + // 0000 0100 ..01 1011 100. .... .... .... + // size<23:22> | R<18> = 0 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || + ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && + (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); + Instr op = LSL_z_p_zw; + if (AreSameLaneSize(zd, zn, zm)) { + op = LSL_z_p_zz; + } + Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lslr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSLR ., /M, ., . + // 0000 0100 ..01 0111 100. .... .... .... + // size<23:22> | R<18> = 1 | L<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(LSLR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // LSR ., /M, ., # + // 0000 0100 ..00 0001 100. .... .... .... + // tszh<23:22> | opc<19:18> = 00 | L<17> = 0 | U<16> = 1 | Pg<12:10> | + // tszl<9:8> | imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, LSR_z_p_zi); +} + +void Assembler::lsr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSR ., /M, ., .D + // 0000 0100 ..01 1001 100. .... .... .... + // size<23:22> | R<18> = 0 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm) || + ((zm.GetLaneSizeInBytes() == kDRegSizeInBytes) && + (zd.GetLaneSizeInBytes() != kDRegSizeInBytes))); + Instr op = LSR_z_p_zw; + if (AreSameLaneSize(zd, zn, zm)) { + op = LSR_z_p_zz; + } + Emit(op | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::lsrr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // LSRR ., /M, ., . + // 0000 0100 ..01 0101 100. .... .... .... + // size<23:22> | R<18> = 1 | L<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(LSRR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +// SVEBitwiseShiftUnpredicated. + +Instr Assembler::EncodeSVEShiftLeftImmediate(int shift, int lane_size_in_bits) { + VIXL_ASSERT((shift >= 0) && (shift < lane_size_in_bits)); + return lane_size_in_bits + shift; +} + +Instr Assembler::EncodeSVEShiftRightImmediate(int shift, + int lane_size_in_bits) { + VIXL_ASSERT((shift > 0) && (shift <= lane_size_in_bits)); + return (2 * lane_size_in_bits) - shift; +} + +void Assembler::SVEBitwiseShiftImmediate(const ZRegister& zd, + const ZRegister& zn, + Instr encoded_imm_and_tsz, + Instr op) { + Instr tszl_and_imm = ExtractUnsignedBitfield32(4, 0, encoded_imm_and_tsz) + << 16; + Instr tszh = ExtractUnsignedBitfield32(6, 5, encoded_imm_and_tsz) << 22; + Emit(op | tszh | tszl_and_imm | Rd(zd) | Rn(zn)); +} + +void Assembler::asr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, ASR_z_zi); +} + +void Assembler::asr(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(ASR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::lsl(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSL_z_zi); +} + +void Assembler::lsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(LSL_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::lsr(const ZRegister& zd, const ZRegister& zn, int shift) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, LSR_z_zi); +} + +void Assembler::lsr(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(LSR_z_zw | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEElementCount. + +#define VIXL_SVE_INC_DEC_LIST(V) \ + V(cntb, CNTB_r_s) \ + V(cnth, CNTH_r_s) \ + V(cntw, CNTW_r_s) \ + V(cntd, CNTD_r_s) \ + V(decb, DECB_r_rs) \ + V(dech, DECH_r_rs) \ + V(decw, DECW_r_rs) \ + V(decd, DECD_r_rs) \ + V(incb, INCB_r_rs) \ + V(inch, INCH_r_rs) \ + V(incw, INCW_r_rs) \ + V(incd, INCD_r_rs) \ + V(sqdecb, SQDECB_r_rs_x) \ + V(sqdech, SQDECH_r_rs_x) \ + V(sqdecw, SQDECW_r_rs_x) \ + V(sqdecd, SQDECD_r_rs_x) \ + V(sqincb, SQINCB_r_rs_x) \ + V(sqinch, SQINCH_r_rs_x) \ + V(sqincw, SQINCW_r_rs_x) \ + V(sqincd, SQINCD_r_rs_x) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(rdn.IsX()); \ + Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +#define VIXL_SVE_UQINC_UQDEC_LIST(V) \ + V(uqdecb, (rdn.IsX() ? UQDECB_r_rs_x : UQDECB_r_rs_uw)) \ + V(uqdech, (rdn.IsX() ? UQDECH_r_rs_x : UQDECH_r_rs_uw)) \ + V(uqdecw, (rdn.IsX() ? UQDECW_r_rs_x : UQDECW_r_rs_uw)) \ + V(uqdecd, (rdn.IsX() ? UQDECD_r_rs_x : UQDECD_r_rs_uw)) \ + V(uqincb, (rdn.IsX() ? UQINCB_r_rs_x : UQINCB_r_rs_uw)) \ + V(uqinch, (rdn.IsX() ? UQINCH_r_rs_x : UQINCH_r_rs_uw)) \ + V(uqincw, (rdn.IsX() ? UQINCW_r_rs_x : UQINCW_r_rs_uw)) \ + V(uqincd, (rdn.IsX() ? UQINCD_r_rs_x : UQINCD_r_rs_uw)) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& rdn, int pattern, int multiplier) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + Emit(OP | Rd(rdn) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_UQINC_UQDEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +#define VIXL_SVE_SQX_INC_DEC_LIST(V) \ + V(sqdecb, SQDECB) \ + V(sqdech, SQDECH) \ + V(sqdecw, SQDECW) \ + V(sqdecd, SQDECD) \ + V(sqincb, SQINCB) \ + V(sqinch, SQINCH) \ + V(sqincw, SQINCW) \ + V(sqincd, SQINCD) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP) \ + void Assembler::FN(const Register& xd, \ + const Register& wn, \ + int pattern, \ + int multiplier) { \ + USE(wn); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(wn.IsW() && xd.Is(wn.X())); \ + Emit(OP##_r_rs_sx | Rd(xd) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_SQX_INC_DEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +#define VIXL_SVE_INC_DEC_VEC_LIST(V) \ + V(dech, DEC, H) \ + V(decw, DEC, W) \ + V(decd, DEC, D) \ + V(inch, INC, H) \ + V(incw, INC, W) \ + V(incd, INC, D) \ + V(sqdech, SQDEC, H) \ + V(sqdecw, SQDEC, W) \ + V(sqdecd, SQDEC, D) \ + V(sqinch, SQINC, H) \ + V(sqincw, SQINC, W) \ + V(sqincd, SQINC, D) \ + V(uqdech, UQDEC, H) \ + V(uqdecw, UQDEC, W) \ + V(uqdecd, UQDEC, D) \ + V(uqinch, UQINC, H) \ + V(uqincw, UQINC, W) \ + V(uqincd, UQINC, D) + +#define VIXL_DEFINE_ASM_FUNC(FN, OP, T) \ + void Assembler::FN(const ZRegister& zdn, int pattern, int multiplier) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(zdn.GetLaneSizeInBytes() == k##T##RegSizeInBytes); \ + Emit(OP##T##_z_zs | Rd(zdn) | ImmSVEPredicateConstraint(pattern) | \ + ImmUnsignedField<19, 16>(multiplier - 1)); \ + } +VIXL_SVE_INC_DEC_VEC_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +// SVEFPAccumulatingReduction. + +void Assembler::fadda(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + // FADDA , , , . + // 0110 0101 ..01 1000 001. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Vdn<4:0> + + USE(vn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.Is(vn)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zm.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zm, vd)); + + Emit(FADDA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); +} + +// SVEFPArithmetic_Predicated. + +void Assembler::fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FABD ., /M, ., . + // 0110 0101 ..00 1000 100. .... .... .... + // size<23:22> | opc<19:16> = 1000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FADD ., /M, ., + // 0110 0101 ..01 1000 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FADD_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FADD ., /M, ., . + // 0110 0101 ..00 0000 100. .... .... .... + // size<23:22> | opc<19:16> = 0000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FDIV ., /M, ., . + // 0110 0101 ..00 1101 100. .... .... .... + // size<23:22> | opc<19:16> = 1101 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FDIVR ., /M, ., . + // 0110 0101 ..00 1100 100. .... .... .... + // size<23:22> | opc<19:16> = 1100 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMAX ., /M, ., + // 0110 0101 ..01 1110 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMAX_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAX ., /M, ., . + // 0110 0101 ..00 0110 100. .... .... .... + // size<23:22> | opc<19:16> = 0110 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMAXNM ., /M, ., + // 0110 0101 ..01 1100 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMAXNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAXNM ., /M, ., . + // 0110 0101 ..00 0100 100. .... .... .... + // size<23:22> | opc<19:16> = 0100 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMAXNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMIN ., /M, ., + // 0110 0101 ..01 1111 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 111 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMIN_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMIN ., /M, ., . + // 0110 0101 ..00 0111 100. .... .... .... + // size<23:22> | opc<19:16> = 0111 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMINNM ., /M, ., + // 0110 0101 ..01 1101 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(((imm == 0.0) && (copysign(1.0, imm) == 1.0)) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FMINNM_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMINNM ., /M, ., . + // 0110 0101 ..00 0101 100. .... .... .... + // size<23:22> | opc<19:16> = 0101 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMINNM_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FMUL ., /M, ., + // 0110 0101 ..01 1010 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 2.0)); + + Instr i1 = (imm == 2.0) ? (1 << 5) : 0; + Emit(FMUL_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMUL ., /M, ., . + // 0110 0101 ..00 0010 100. .... .... .... + // size<23:22> | opc<19:16> = 0010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMULX ., /M, ., . + // 0110 0101 ..00 1010 100. .... .... .... + // size<23:22> | opc<19:16> = 1010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMULX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fscale(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FSCALE ., /M, ., . + // 0110 0101 ..00 1001 100. .... .... .... + // size<23:22> | opc<19:16> = 1001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSCALE_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FSUB ., /M, ., + // 0110 0101 ..01 1001 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FSUB_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FSUB ., /M, ., . + // 0110 0101 ..00 0001 100. .... .... .... + // size<23:22> | opc<19:16> = 0001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + double imm) { + // FSUBR ., /M, ., + // 0110 0101 ..01 1011 100. ..00 00.. .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | i1<5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((imm == 0.5) || (imm == 1.0)); + + Instr i1 = (imm == 1.0) ? (1 << 5) : 0; + Emit(FSUBR_z_p_zs | SVESize(zd) | Rd(zd) | PgLow8(pg) | i1); +} + +void Assembler::fsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FSUBR ., /M, ., . + // 0110 0101 ..00 0011 100. .... .... .... + // size<23:22> | opc<19:16> = 0011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3) { + // FTMAD ., ., ., # + // 0110 0101 ..01 0... 1000 00.. .... .... + // size<23:22> | imm3<18:16> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FTMAD_z_zzi | SVESize(zd) | Rd(zd) | Rn(zm) | + ImmUnsignedField<18, 16>(imm3)); +} + +// SVEFPArithmeticUnpredicated. + +void Assembler::fadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FADD ., ., . + // 0110 0101 ..0. .... 0000 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FMUL ., ., . + // 0110 0101 ..0. .... 0000 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::frecps(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FRECPS ., ., . + // 0110 0101 ..0. .... 0001 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRECPS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::frsqrts(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FRSQRTS ., ., . + // 0110 0101 ..0. .... 0001 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRSQRTS_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::fsub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FSUB ., ., . + // 0110 0101 ..0. .... 0000 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ftsmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FTSMUL ., ., . + // 0110 0101 ..0. .... 0000 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FTSMUL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEFPCompareVectors. + +void Assembler::facge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FACGE ., /Z, ., . + // 0110 0101 ..0. .... 110. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FACGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::facgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FACGT ., /Z, ., . + // 0110 0101 ..0. .... 111. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FACGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMEQ ., /Z, ., . + // 0110 0101 ..0. .... 011. .... ...0 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | + // o3<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMEQ_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMGE ., /Z, ., . + // 0110 0101 ..0. .... 010. .... ...0 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMGE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMGT ., /Z, ., . + // 0110 0101 ..0. .... 010. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMGT_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMNE ., /Z, ., . + // 0110 0101 ..0. .... 011. .... ...1 .... + // size<23:22> | Zm<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> | + // o3<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMNE_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fcmuo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FCMUO ., /Z, ., . + // 0110 0101 ..0. .... 110. .... ...0 .... + // size<23:22> | Zm<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> | + // o3<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FCMUO_p_p_zz | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +// SVEFPCompareWithZero. + +void Assembler::fcmeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMEQ ., /Z, ., #0.0 + // 0110 0101 ..01 0010 001. .... ...0 .... + // size<23:22> | eq<17> = 1 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMEQ_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMGE ., /Z, ., #0.0 + // 0110 0101 ..01 0000 001. .... ...0 .... + // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMGE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMGT ., /Z, ., #0.0 + // 0110 0101 ..01 0000 001. .... ...1 .... + // size<23:22> | eq<17> = 0 | lt<16> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMGT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmle(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMLE ., /Z, ., #0.0 + // 0110 0101 ..01 0001 001. .... ...1 .... + // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMLE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmlt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMLT ., /Z, ., #0.0 + // 0110 0101 ..01 0001 001. .... ...0 .... + // size<23:22> | eq<17> = 0 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMLT_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcmne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + double zero) { + // FCMNE ., /Z, ., #0.0 + // 0110 0101 ..01 0011 001. .... ...0 .... + // size<23:22> | eq<17> = 1 | lt<16> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(zero == 0.0); + USE(zero); + + Emit(FCMNE_p_p_z0 | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn)); +} + +// SVEFPComplexAddition. + +void Assembler::fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // FCADD ., /M, ., ., + // 0110 0100 ..00 000. 100. .... .... .... + // size<23:22> | rot<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((rot == 90) || (rot == 270)); + + Instr rotate_bit = (rot == 90) ? 0 : (1 << 16); + Emit(FCADD_z_p_zz | rotate_bit | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +// SVEFPComplexMulAdd. + +void Assembler::fcmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // FCMLA ., /M, ., ., + // 0110 0100 ..0. .... 0... .... .... .... + // size<23:22> | Zm<20:16> | rot<14:13> | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 13; + Emit(FCMLA_z_p_zzz | rotate_bit | SVESize(zda) | Rd(zda) | PgLow8(pg) | + Rn(zn) | Rm(zm)); +} + +// SVEFPComplexMulAddIndex. + +void Assembler::fcmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(FCMLA_z_zzzi_h | SVEMulComplexIndexHelper(zm, index) | rotate_bit | + Rd(zda) | Rn(zn)); +} + +// SVEFPFastReduction. + +void Assembler::faddv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FADDV , , . + // 0110 0101 ..00 0000 001. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FADDV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fmaxnmv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMAXNMV , , . + // 0110 0101 ..00 0100 001. .... .... .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMAXNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fmaxv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMAXV , , . + // 0110 0101 ..00 0110 001. .... .... .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMAXV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fminnmv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMINNMV , , . + // 0110 0101 ..00 0101 001. .... .... .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMINNMV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fminv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // FMINV , , . + // 0110 0101 ..00 0111 001. .... .... .... + // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(AreSameLaneSize(zn, vd)); + + Emit(FMINV_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +// SVEFPMulAdd. + +void Assembler::fmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FMAD ., /M, ., . + // 0110 0101 ..1. .... 100. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 00 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +void Assembler::fmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMLA ., /M, ., . + // 0110 0101 ..1. .... 000. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 00 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMLS ., /M, ., . + // 0110 0101 ..1. .... 001. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 01 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FMSB ., /M, ., . + // 0110 0101 ..1. .... 101. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 01 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +void Assembler::fnmad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FNMAD ., /M, ., . + // 0110 0101 ..1. .... 110. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 10 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +void Assembler::fnmla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FNMLA ., /M, ., . + // 0110 0101 ..1. .... 010. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 10 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fnmls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FNMLS ., /M, ., . + // 0110 0101 ..1. .... 011. .... .... .... + // size<23:22> | Zm<20:16> | opc<14:13> = 11 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::fnmsb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // FNMSB ., /M, ., . + // 0110 0101 ..1. .... 111. .... .... .... + // size<23:22> | Za<20:16> | opc<14:13> = 11 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNMSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rn(zm) | Rm(za)); +} + +Instr Assembler::SVEMulIndexHelper(unsigned lane_size_in_bytes_log2, + const ZRegister& zm, + int index, + Instr op_h, + Instr op_s, + Instr op_d) { + Instr size = lane_size_in_bytes_log2 << SVESize_offset; + Instr zm_with_index = Rm(zm); + Instr op = 0xffffffff; + // Allowable register number and lane index depends on the lane size. + switch (lane_size_in_bytes_log2) { + case kHRegSizeInBytesLog2: + VIXL_ASSERT(zm.GetCode() <= 7); + VIXL_ASSERT(IsUint3(index)); + // For H-sized lanes, size is encoded as 0b0x, where x is used as the top + // bit of the index. So, if index is less than four, the top bit of index + // is zero, and therefore size is 0b00. Otherwise, it's 0b01, the usual + // encoding for H-sized lanes. + if (index < 4) size = 0; + // Top two bits of "zm" encode the index. + zm_with_index |= (index & 3) << (Rm_offset + 3); + op = op_h; + break; + case kSRegSizeInBytesLog2: + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint2(index)); + // Top two bits of "zm" encode the index. + zm_with_index |= (index & 3) << (Rm_offset + 3); + op = op_s; + break; + case kDRegSizeInBytesLog2: + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint1(index)); + // Top bit of "zm" encodes the index. + zm_with_index |= (index & 1) << (Rm_offset + 4); + op = op_d; + break; + default: + VIXL_UNIMPLEMENTED(); + } + return op | zm_with_index | size; +} + +Instr Assembler::SVEMulLongIndexHelper(const ZRegister& zm, int index) { + Instr imm_field; + Instr zm_id; + if (zm.IsLaneSizeH()) { + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint3(index)); + imm_field = ExtractUnsignedBitfield32(2, 1, index) << 19; + zm_id = Rx<18, 16>(zm); + } else { + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint2(index)); + imm_field = ExtractBit(index, 1) << 20; + zm_id = Rx<19, 16>(zm); + } + + // Synthesize the low part of immediate encoding. + imm_field |= ExtractBit(index, 0) << 11; + + return zm_id | imm_field; +} + +Instr Assembler::SVEMulComplexIndexHelper(const ZRegister& zm, int index) { + Instr zm_idx_size; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i2<20:19> + VIXL_CHECK(zm.GetCode() <= 7); + VIXL_CHECK(IsUint2(index)); + zm_idx_size = (index << 19) | Rx<18, 16>(zm) | 0; + } else { + VIXL_ASSERT(zm.IsLaneSizeS()); + // Zm<19:16> | i1<20> + VIXL_CHECK(zm.GetCode() <= 15); + VIXL_CHECK(IsUint1(index)); + zm_idx_size = (index << 20) | Rx<19, 16>(zm) | (1 << 22); + } + return zm_idx_size; +} + +// SVEFPMulAddIndex. + +void Assembler::fmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + FMLA_z_zzzi_h, + FMLA_z_zzzi_s, + FMLA_z_zzzi_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +void Assembler::fmls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + FMLS_z_zzzi_h, + FMLS_z_zzzi_s, + FMLS_z_zzzi_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +// SVEFPMulIndex. + +// This prototype maps to 3 instruction encodings: +void Assembler::fmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned index) { + // FMUL ., ., .[] + // 0110 0100 ..1. .... 0010 00.. .... .... + // size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + FMUL_z_zzi_h, + FMUL_z_zzi_s, + FMUL_z_zzi_d); + + Emit(synthesized_op | Rd(zd) | Rn(zn)); +} + +// SVEFPUnaryOpPredicated. + +void Assembler::fcvt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVT_z_p_z_h2s; + break; + case kDRegSizeInBytes: + op = FCVT_z_p_z_h2d; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVT_z_p_z_s2h; + break; + case kDRegSizeInBytes: + op = FCVT_z_p_z_s2d; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVT_z_p_z_d2h; + break; + case kSRegSizeInBytes: + op = FCVT_z_p_z_d2s; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtzs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVTZS_z_p_z_fp162h; + break; + case kSRegSizeInBytes: + op = FCVTZS_z_p_z_fp162w; + break; + case kDRegSizeInBytes: + op = FCVTZS_z_p_z_fp162x; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZS_z_p_z_s2w; + break; + case kDRegSizeInBytes: + op = FCVTZS_z_p_z_s2x; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZS_z_p_z_d2w; + break; + case kDRegSizeInBytes: + op = FCVTZS_z_p_z_d2x; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtzu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = FCVTZU_z_p_z_fp162h; + break; + case kSRegSizeInBytes: + op = FCVTZU_z_p_z_fp162w; + break; + case kDRegSizeInBytes: + op = FCVTZU_z_p_z_fp162x; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZU_z_p_z_s2w; + break; + case kDRegSizeInBytes: + op = FCVTZU_z_p_z_s2x; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kSRegSizeInBytes: + op = FCVTZU_z_p_z_d2w; + break; + case kDRegSizeInBytes: + op = FCVTZU_z_p_z_d2x; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frecpx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FRECPX ., /M, . + // 0110 0101 ..00 1100 101. .... .... .... + // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRECPX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frinta(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTA_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frinti(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTI_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTM_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintn(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTN_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTP_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTX_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::frintz(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRINTZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fsqrt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FSQRT ., /M, . + // 0110 0101 ..00 1101 101. .... .... .... + // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FSQRT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::scvtf(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = SCVTF_z_p_z_h2fp16; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = SCVTF_z_p_z_w2fp16; + break; + case kSRegSizeInBytes: + op = SCVTF_z_p_z_w2s; + break; + case kDRegSizeInBytes: + op = SCVTF_z_p_z_w2d; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = SCVTF_z_p_z_x2fp16; + break; + case kSRegSizeInBytes: + op = SCVTF_z_p_z_x2s; + break; + case kDRegSizeInBytes: + op = SCVTF_z_p_z_x2d; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::ucvtf(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Instr op = 0xffffffff; + switch (zn.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = UCVTF_z_p_z_h2fp16; + break; + } + break; + case kSRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = UCVTF_z_p_z_w2fp16; + break; + case kSRegSizeInBytes: + op = UCVTF_z_p_z_w2s; + break; + case kDRegSizeInBytes: + op = UCVTF_z_p_z_w2d; + break; + } + break; + case kDRegSizeInBytes: + switch (zd.GetLaneSizeInBytes()) { + case kHRegSizeInBytes: + op = UCVTF_z_p_z_x2fp16; + break; + case kSRegSizeInBytes: + op = UCVTF_z_p_z_x2s; + break; + case kDRegSizeInBytes: + op = UCVTF_z_p_z_x2d; + break; + } + break; + } + VIXL_ASSERT(op != 0xffffffff); + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +// SVEFPUnaryOpUnpredicated. + +void Assembler::frecpe(const ZRegister& zd, const ZRegister& zn) { + // FRECPE ., . + // 0110 0101 ..00 1110 0011 00.. .... .... + // size<23:22> | opc<18:16> = 110 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRECPE_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::frsqrte(const ZRegister& zd, const ZRegister& zn) { + // FRSQRTE ., . + // 0110 0101 ..00 1111 0011 00.. .... .... + // size<23:22> | opc<18:16> = 111 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FRSQRTE_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +// SVEIncDecByPredicateCount. + +void Assembler::decp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // DECP , . + // 0010 0101 ..10 1101 1000 100. .... .... + // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rdn.IsX()); + + Emit(DECP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::decp(const ZRegister& zdn, const PRegister& pg) { + // DECP ., + // 0010 0101 ..10 1101 1000 000. .... .... + // size<23:22> | op<17> = 0 | D<16> = 1 | opc2<10:9> = 00 | Pg<8:5> | + // Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(DECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::incp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // INCP , . + // 0010 0101 ..10 1100 1000 100. .... .... + // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rdn.IsX()); + + Emit(INCP_r_p_r | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::incp(const ZRegister& zdn, const PRegister& pg) { + // INCP ., + // 0010 0101 ..10 1100 1000 000. .... .... + // size<23:22> | op<17> = 0 | D<16> = 0 | opc2<10:9> = 00 | Pg<8:5> | + // Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(INCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::sqdecp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn) { + // SQDECP , ., + // 0010 0101 ..10 1010 1000 100. .... .... + // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + USE(wn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn)); + + Emit(SQDECP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg)); +} + +void Assembler::sqdecp(const Register& xdn, const PRegisterWithLaneSize& pg) { + // SQDECP , . + // 0010 0101 ..10 1010 1000 110. .... .... + // size<23:22> | D<17> = 1 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xdn.IsX()); + + Emit(SQDECP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg)); +} + +void Assembler::sqdecp(const ZRegister& zdn, const PRegister& pg) { + // SQDECP ., + // 0010 0101 ..10 1010 1000 000. .... .... + // size<23:22> | D<17> = 1 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(SQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::sqincp(const Register& xd, + const PRegisterWithLaneSize& pg, + const Register& wn) { + // SQINCP , ., + // 0010 0101 ..10 1000 1000 100. .... .... + // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + USE(wn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX() && wn.IsW() && xd.Aliases(wn)); + + Emit(SQINCP_r_p_r_sx | SVESize(pg) | Rd(xd) | Rx<8, 5>(pg)); +} + +void Assembler::sqincp(const Register& xdn, const PRegisterWithLaneSize& pg) { + // SQINCP , . + // 0010 0101 ..10 1000 1000 110. .... .... + // size<23:22> | D<17> = 0 | U<16> = 0 | sf<10> = 1 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xdn.IsX()); + + Emit(SQINCP_r_p_r_x | SVESize(pg) | Rd(xdn) | Rx<8, 5>(pg)); +} + +void Assembler::sqincp(const ZRegister& zdn, const PRegister& pg) { + // SQINCP ., + // 0010 0101 ..10 1000 1000 000. .... .... + // size<23:22> | D<17> = 0 | U<16> = 0 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(SQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::uqdecp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // UQDECP , . + // UQDECP , . + // 0010 0101 ..10 1011 1000 10.. .... .... + // size<23:22> | D<17> = 1 | U<16> = 1 | sf<10> | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Instr op = rdn.IsX() ? UQDECP_r_p_r_x : UQDECP_r_p_r_uw; + Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::uqdecp(const ZRegister& zdn, const PRegister& pg) { + // UQDECP ., + // 0010 0101 ..10 1011 1000 000. .... .... + // size<23:22> | D<17> = 1 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(UQDECP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +void Assembler::uqincp(const Register& rdn, const PRegisterWithLaneSize& pg) { + // UQINCP , . + // 0010 0101 ..10 1001 1000 100. .... .... + // size<23:22> | D<17> = 0 | U<16> = 1 | sf<10> = 0 | op<9> = 0 | Pg<8:5> | + // Rdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Instr op = rdn.IsX() ? UQINCP_r_p_r_x : UQINCP_r_p_r_uw; + Emit(op | SVESize(pg) | Rd(rdn) | Rx<8, 5>(pg)); +} + +void Assembler::uqincp(const ZRegister& zdn, const PRegister& pg) { + // UQINCP ., + // 0010 0101 ..10 1001 1000 000. .... .... + // size<23:22> | D<17> = 0 | U<16> = 1 | opc<10:9> = 00 | Pg<8:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zdn.GetLaneSizeInBytes() != kBRegSizeInBytes); + VIXL_ASSERT(pg.IsUnqualified()); + + Emit(UQINCP_z_p_z | SVESize(zdn) | Rd(zdn) | Pg<8, 5>(pg)); +} + +// SVEIndexGeneration. + +void Assembler::index(const ZRegister& zd, int start, int step) { + // INDEX ., #, # + // 0000 0100 ..1. .... 0100 00.. .... .... + // size<23:22> | step<20:16> | start<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(INDEX_z_ii | SVESize(zd) | ImmField<20, 16>(step) | + ImmField<9, 5>(start) | Rd(zd)); +} + +void Assembler::index(const ZRegister& zd, + const Register& rn, + const Register& rm) { + // INDEX ., , + // 0000 0100 ..1. .... 0100 11.. .... .... + // size<23:22> | Rm<20:16> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast(rn.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + VIXL_ASSERT(static_cast(rm.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(INDEX_z_rr | SVESize(zd) | Rd(zd) | Rn(rn) | Rm(rm)); +} + +void Assembler::index(const ZRegister& zd, const Register& rn, int imm5) { + // INDEX ., , # + // 0000 0100 ..1. .... 0100 01.. .... .... + // size<23:22> | imm5<20:16> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast(rn.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(INDEX_z_ri | SVESize(zd) | Rd(zd) | Rn(rn) | ImmField<20, 16>(imm5)); +} + +void Assembler::index(const ZRegister& zd, int imm5, const Register& rm) { + // INDEX ., #, + // 0000 0100 ..1. .... 0100 10.. .... .... + // size<23:22> | Rm<20:16> | imm5<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast(rm.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(INDEX_z_ir | SVESize(zd) | Rd(zd) | ImmField<9, 5>(imm5) | Rm(rm)); +} + +// SVEIntArithmeticUnpredicated. + +void Assembler::add(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ADD ., ., . + // 0000 0100 ..1. .... 0000 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQADD ., ., . + // 0000 0100 ..1. .... 0001 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqsub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQSUB ., ., . + // 0000 0100 ..1. .... 0001 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 110 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SUB ., ., . + // 0000 0100 ..1. .... 0000 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uqadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UQADD ., ., . + // 0000 0100 ..1. .... 0001 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UQADD_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uqsub(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UQSUB ., ., . + // 0000 0100 ..1. .... 0001 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 111 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UQSUB_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEIntBinaryArithmeticPredicated. + +void Assembler::add(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ADD ., /M, ., . + // 0000 0100 ..00 0000 000. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(ADD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::and_(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // AND ., /M, ., . + // 0000 0100 ..01 1010 000. .... .... .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(AND_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::bic(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // BIC ., /M, ., . + // 0000 0100 ..01 1011 000. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(BIC_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::eor(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // EOR ., /M, ., . + // 0000 0100 ..01 1001 000. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(EOR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::mul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MUL ., /M, ., . + // 0000 0100 ..01 0000 000. .... .... .... + // size<23:22> | H<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(MUL_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::orr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ORR ., /M, ., . + // 0000 0100 ..01 1000 000. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(ORR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SABD ., /M, ., . + // 0000 0100 ..00 1100 000. .... .... .... + // size<23:22> | opc<18:17> = 10 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sdiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SDIV ., /M, ., . + // 0000 0100 ..01 0100 000. .... .... .... + // size<23:22> | R<17> = 0 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(SDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sdivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SDIVR ., /M, ., . + // 0000 0100 ..01 0110 000. .... .... .... + // size<23:22> | R<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(SDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::smax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMAX ., /M, ., . + // 0000 0100 ..00 1000 000. .... .... .... + // size<23:22> | opc<18:17> = 00 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::smin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMIN ., /M, ., . + // 0000 0100 ..00 1010 000. .... .... .... + // size<23:22> | opc<18:17> = 01 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::smulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMULH ., /M, ., . + // 0000 0100 ..01 0010 000. .... .... .... + // size<23:22> | H<17> = 1 | U<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SUB ., /M, ., . + // 0000 0100 ..00 0001 000. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SUB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::subr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SUBR ., /M, ., . + // 0000 0100 ..00 0011 000. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(SUBR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UABD ., /M, ., . + // 0000 0100 ..00 1101 000. .... .... .... + // size<23:22> | opc<18:17> = 10 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UABD_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::udiv(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UDIV ., /M, ., . + // 0000 0100 ..01 0101 000. .... .... .... + // size<23:22> | R<17> = 0 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(UDIV_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::udivr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UDIVR ., /M, ., . + // 0000 0100 ..01 0111 000. .... .... .... + // size<23:22> | R<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(UDIVR_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMAX ., /M, ., . + // 0000 0100 ..00 1001 000. .... .... .... + // size<23:22> | opc<18:17> = 00 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UMAX_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMIN ., /M, ., . + // 0000 0100 ..00 1011 000. .... .... .... + // size<23:22> | opc<18:17> = 01 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UMIN_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umulh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMULH ., /M, ., . + // 0000 0100 ..01 0011 000. .... .... .... + // size<23:22> | H<17> = 1 | U<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(UMULH_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +// SVEIntCompareScalars. + +void Assembler::ctermeq(const Register& rn, const Register& rm) { + // CTERMEQ , + // 0010 0101 1.1. .... 0010 00.. ...0 0000 + // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 0 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000; + + Emit(CTERMEQ_rr | sz | Rn(rn) | Rm(rm)); +} + +void Assembler::ctermne(const Register& rn, const Register& rm) { + // CTERMNE , + // 0010 0101 1.1. .... 0010 00.. ...1 0000 + // op<23> = 1 | sz<22> | Rm<20:16> | Rn<9:5> | ne<4> = 1 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sz = rn.Is64Bits() ? 0x00400000 : 0x00000000; + + Emit(CTERMNE_rr | sz | Rn(rn) | Rm(rm)); +} + +void Assembler::whilele(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELE ., , + // 0010 0101 ..1. .... 000. 01.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELE_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilelo(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELO ., , + // 0010 0101 ..1. .... 000. 11.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELO_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilels(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELS ., , + // 0010 0101 ..1. .... 000. 11.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 1 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELS_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilelt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILELT ., , + // 0010 0101 ..1. .... 000. 01.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> = 0 | lt<10> = 1 | Rn<9:5> | + // eq<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(WHILELT_p_p_rr | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEIntCompareVectorsOp op) { + Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm, + SVEIntCompareSignedImmOp op) { + Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm)); +} + +void Assembler::CompareVectors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm, + SVEIntCompareUnsignedImmOp op) { + Emit(op | SVESize(zn) | Pd(pd) | PgLow8(pg) | Rn(zn) | + ImmUnsignedField<20, 14>(imm)); +} + +void Assembler::cmp(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + switch (cond) { + case eq: + cmpeq(pd, pg, zn, zm); + break; + case ge: + cmpge(pd, pg, zn, zm); + break; + case gt: + cmpgt(pd, pg, zn, zm); + break; + case le: + cmple(pd, pg, zn, zm); + break; + case lt: + cmplt(pd, pg, zn, zm); + break; + case ne: + cmpne(pd, pg, zn, zm); + break; + case hi: + cmphi(pd, pg, zn, zm); + break; + case hs: + cmphs(pd, pg, zn, zm); + break; + case lo: + cmplo(pd, pg, zn, zm); + break; + case ls: + cmpls(pd, pg, zn, zm); + break; + default: + VIXL_UNREACHABLE(); + } +} + +// SVEIntCompareSignedImm. + +void Assembler::cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPEQ ., /Z, ., # + // 0010 0101 ..0. .... 100. .... ...0 .... + // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPEQ_p_p_zi); +} + +void Assembler::cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPGE ., /Z, ., # + // 0010 0101 ..0. .... 000. .... ...0 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPGE_p_p_zi); +} + +void Assembler::cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPGT ., /Z, ., # + // 0010 0101 ..0. .... 000. .... ...1 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPGT_p_p_zi); +} + +void Assembler::cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPLE ., /Z, ., # + // 0010 0101 ..0. .... 001. .... ...1 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> + // | ne<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPLE_p_p_zi); +} + +void Assembler::cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPLT ., /Z, ., # + // 0010 0101 ..0. .... 001. .... ...0 .... + // size<23:22> | imm5<20:16> | op<15> = 0 | o2<13> = 1 | Pg<12:10> | Zn<9:5> + // | ne<4> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPLT_p_p_zi); +} + +void Assembler::cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // CMPNE ., /Z, ., # + // 0010 0101 ..0. .... 100. .... ...1 .... + // size<23:22> | imm5<20:16> | op<15> = 1 | o2<13> = 0 | Pg<12:10> | Zn<9:5> + // | ne<4> = 1 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm5, CMPNE_p_p_zi); +} + +// SVEIntCompareUnsignedImm. + +void Assembler::cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPHI ., /Z, ., # + // 0010 0100 ..1. .... ..0. .... ...1 .... + // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPHI_p_p_zi); +} + +void Assembler::cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPHS ., /Z, ., # + // 0010 0100 ..1. .... ..0. .... ...0 .... + // size<23:22> | imm7<20:14> | lt<13> = 0 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPHS_p_p_zi); +} + +void Assembler::cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPLO ., /Z, ., # + // 0010 0100 ..1. .... ..1. .... ...0 .... + // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPLO_p_p_zi); +} + +void Assembler::cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + unsigned imm7) { + // CMPLS ., /Z, ., # + // 0010 0100 ..1. .... ..1. .... ...1 .... + // size<23:22> | imm7<20:14> | lt<13> = 1 | Pg<12:10> | Zn<9:5> | ne<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + + CompareVectors(pd, pg, zn, imm7, CMPLS_p_p_zi); +} + +// SVEIntCompareVectors. + +// This prototype maps to 2 instruction encodings: +// CMPEQ_p_p_zw +// CMPEQ_p_p_zz +void Assembler::cmpeq(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPEQ_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPEQ_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPGE_p_p_zw +// CMPGE_p_p_zz +void Assembler::cmpge(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPGE_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPGE_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPGT_p_p_zw +// CMPGT_p_p_zz +void Assembler::cmpgt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPGT_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPGT_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPHI_p_p_zw +// CMPHI_p_p_zz +void Assembler::cmphi(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPHI_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPHI_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// This prototype maps to 2 instruction encodings: +// CMPHS_p_p_zw +// CMPHS_p_p_zz +void Assembler::cmphs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPHS_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPHS_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +void Assembler::cmple(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmpge(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLE_p_p_zw); +} + +void Assembler::cmplo(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmphi(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLO_p_p_zw); +} + +void Assembler::cmpls(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmphs(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLS_p_p_zw); +} + +void Assembler::cmplt(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + if (AreSameLaneSize(zn, zm)) { + cmpgt(pd, pg, zm, zn); + return; + } + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(!zn.IsLaneSizeD()); + + CompareVectors(pd, pg, zn, zm, CMPLT_p_p_zw); +} + +// This prototype maps to 2 instruction encodings: +// CMPNE_p_p_zw +// CMPNE_p_p_zz +void Assembler::cmpne(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, zn)); + SVEIntCompareVectorsOp op = CMPNE_p_p_zz; + if (!AreSameLaneSize(zn, zm)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + op = CMPNE_p_p_zw; + } + CompareVectors(pd, pg, zn, zm, op); +} + +// SVEIntMiscUnpredicated. + +void Assembler::fexpa(const ZRegister& zd, const ZRegister& zn) { + // FEXPA ., . + // 0000 0100 ..10 0000 1011 10.. .... .... + // size<23:22> | opc<20:16> = 00000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FEXPA_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::ftssel(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // FTSSEL ., ., . + // 0000 0100 ..1. .... 1011 00.. .... .... + // size<23:22> | Zm<20:16> | op<10> = 0 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FTSSEL_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::movprfx(const ZRegister& zd, const ZRegister& zn) { + // MOVPRFX , + // 0000 0100 0010 0000 1011 11.. .... .... + // opc<23:22> = 00 | opc2<20:16> = 00000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(MOVPRFX_z_z | Rd(zd) | Rn(zn)); +} + +// SVEIntMulAddPredicated. + +void Assembler::mad(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // MAD ., /M, ., . + // 0000 0100 ..0. .... 110. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Za<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + + Emit(MAD_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za)); +} + +void Assembler::mla(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MLA ., /M, ., . + // 0000 0100 ..0. .... 010. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 0 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(MLA_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::mls(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MLS ., /M, ., . + // 0000 0100 ..0. .... 011. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(MLS_z_p_zzz | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::msb(const ZRegister& zdn, + const PRegisterM& pg, + const ZRegister& zm, + const ZRegister& za) { + // MSB ., /M, ., . + // 0000 0100 ..0. .... 111. .... .... .... + // size<23:22> | Zm<20:16> | op<13> = 1 | Pg<12:10> | Za<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zdn, zm, za)); + + Emit(MSB_z_p_zzz | SVESize(zdn) | Rd(zdn) | PgLow8(pg) | Rm(zm) | Rn(za)); +} + +// SVEIntMulAddUnpredicated. + +void Assembler::sdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zm, zn)); + + Emit(SDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::udot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zm, zn)); + + Emit(UDOT_z_zzz | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// SVEIntReduction. + +void Assembler::andv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(ANDV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::eorv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(EORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::movprfx(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn) { + // MOVPRFX ., /, . + // 0000 0100 ..01 000. 001. .... .... .... + // size<23:22> | opc<18:17> = 00 | M<16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + VIXL_ASSERT(!pg.HasLaneSize()); + + Instr m = pg.IsMerging() ? 0x00010000 : 0x00000000; + Emit(MOVPRFX_z_p_z | SVESize(zd) | m | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::orv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(ORV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::saddv(const VRegister& dd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kDRegSizeInBytes); + + Emit(SADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::smaxv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(SMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sminv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(SMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uaddv(const VRegister& dd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(UADDV_r_p_z | SVESize(zn) | Rd(dd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::umaxv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(UMAXV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uminv(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(UMINV_r_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +// SVEIntUnaryArithmeticPredicated. + +void Assembler::abs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // ABS ., /M, . + // 0000 0100 ..01 0110 101. .... .... .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(ABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CLS ., /M, . + // 0000 0100 ..01 1000 101. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CLS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::clz(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CLZ ., /M, . + // 0000 0100 ..01 1001 101. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CLZ_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cnot(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CNOT ., /M, . + // 0000 0100 ..01 1011 101. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CNOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cnt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // CNT ., /M, . + // 0000 0100 ..01 1010 101. .... .... .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(CNT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fabs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FABS ., /M, . + // 0000 0100 ..01 1100 101. .... .... .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FABS_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fneg(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FNEG ., /M, . + // 0000 0100 ..01 1101 101. .... .... .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(FNEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::neg(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // NEG ., /M, . + // 0000 0100 ..01 0111 101. .... .... .... + // size<23:22> | opc<18:16> = 111 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(NEG_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::not_(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // NOT ., /M, . + // 0000 0100 ..01 1110 101. .... .... .... + // size<23:22> | opc<18:16> = 110 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(NOT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sxtb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SXTB ., /M, . + // 0000 0100 ..01 0000 101. .... .... .... + // size<23:22> | opc<18:16> = 000 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes); + + Emit(SXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sxth(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SXTH ., /M, . + // 0000 0100 ..01 0010 101. .... .... .... + // size<23:22> | opc<18:16> = 010 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes); + + Emit(SXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sxtw(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SXTW .D, /M, .D + // 0000 0100 ..01 0100 101. .... .... .... + // size<23:22> | opc<18:16> = 100 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes); + + Emit(SXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uxtb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // UXTB ., /M, . + // 0000 0100 ..01 0001 101. .... .... .... + // size<23:22> | opc<18:16> = 001 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kBRegSizeInBytes); + + Emit(UXTB_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uxth(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // UXTH ., /M, . + // 0000 0100 ..01 0011 101. .... .... .... + // size<23:22> | opc<18:16> = 011 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kHRegSizeInBytes); + + Emit(UXTH_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uxtw(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // UXTW .D, /M, .D + // 0000 0100 ..01 0101 101. .... .... .... + // size<23:22> | opc<18:16> = 101 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() > kSRegSizeInBytes); + + Emit(UXTW_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +// SVEIntWideImmPredicated. + +void Assembler::cpy(const ZRegister& zd, + const PRegister& pg, + int imm8, + int shift) { + // CPY ., /, #{, } + // 0000 0101 ..01 .... 0... .... .... .... + // size<23:22> | Pg<19:16> | M<14> | sh<13> | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + + ResolveSVEImm8Shift(&imm8, &shift); + + Instr sh = (shift > 0) ? (1 << 13) : 0; + Instr m = pg.IsMerging() ? (1 << 14) : 0; + Emit(CPY_z_p_i | m | sh | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | + ImmField<12, 5>(imm8)); +} + +void Assembler::fcpy(const ZRegister& zd, const PRegisterM& pg, double imm) { + // FCPY ., /M, # + // 0000 0101 ..01 .... 110. .... .... .... + // size<23:22> | Pg<19:16> | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Instr imm_field = ImmUnsignedField<12, 5>(FP64ToImm8(imm)); + Emit(FCPY_z_p_i | SVESize(zd) | Rd(zd) | Pg<19, 16>(pg) | imm_field); +} + +// SVEIntAddSubtractImmUnpredicated. + +void Assembler::SVEIntAddSubtractImmUnpredicatedHelper( + SVEIntAddSubtractImm_UnpredicatedOp op, + const ZRegister& zd, + int imm8, + int shift) { + if (shift < 0) { + VIXL_ASSERT(shift == -1); + // Derive the shift amount from the immediate. + if (IsUint8(imm8)) { + shift = 0; + } else if (IsUint16(imm8) && ((imm8 % 256) == 0)) { + imm8 /= 256; + shift = 8; + } + } + + VIXL_ASSERT(IsUint8(imm8)); + VIXL_ASSERT((shift == 0) || (shift == 8)); + + Instr shift_bit = (shift > 0) ? (1 << 13) : 0; + Emit(op | SVESize(zd) | Rd(zd) | shift_bit | ImmUnsignedField<12, 5>(imm8)); +} + +void Assembler::add(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // ADD ., ., #{, } + // 0010 0101 ..10 0000 11.. .... .... .... + // size<23:22> | opc<18:16> = 000 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(ADD_z_zi, zd, imm8, shift); +} + +void Assembler::dup(const ZRegister& zd, int imm8, int shift) { + // DUP ., #{, } + // 0010 0101 ..11 1000 11.. .... .... .... + // size<23:22> | opc<18:17> = 00 | sh<13> | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + ResolveSVEImm8Shift(&imm8, &shift); + VIXL_ASSERT((shift < 8) || !zd.IsLaneSizeB()); + + Instr shift_bit = (shift > 0) ? (1 << 13) : 0; + Emit(DUP_z_i | SVESize(zd) | Rd(zd) | shift_bit | ImmField<12, 5>(imm8)); +} + +void Assembler::fdup(const ZRegister& zd, double imm) { + // FDUP ., # + // 0010 0101 ..11 1001 110. .... .... .... + // size<23:22> | opc<18:17> = 00 | o2<13> = 0 | imm8<12:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Instr encoded_imm = FP64ToImm8(imm) << 5; + Emit(FDUP_z_i | SVESize(zd) | encoded_imm | Rd(zd)); +} + +void Assembler::mul(const ZRegister& zd, const ZRegister& zn, int imm8) { + // MUL ., ., # + // 0010 0101 ..11 0000 110. .... .... .... + // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(MUL_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); +} + +void Assembler::smax(const ZRegister& zd, const ZRegister& zn, int imm8) { + // SMAX ., ., # + // 0010 0101 ..10 1000 110. .... .... .... + // size<23:22> | opc<18:16> = 000 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(SMAX_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); +} + +void Assembler::smin(const ZRegister& zd, const ZRegister& zn, int imm8) { + // SMIN ., ., # + // 0010 0101 ..10 1010 110. .... .... .... + // size<23:22> | opc<18:16> = 010 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(SMIN_z_zi | SVESize(zd) | Rd(zd) | ImmField<12, 5>(imm8)); +} + +void Assembler::sqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SQADD ., ., #{, } + // 0010 0101 ..10 0100 11.. .... .... .... + // size<23:22> | opc<18:16> = 100 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SQADD_z_zi, zd, imm8, shift); +} + +void Assembler::sqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SQSUB ., ., #{, } + // 0010 0101 ..10 0110 11.. .... .... .... + // size<23:22> | opc<18:16> = 110 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SQSUB_z_zi, zd, imm8, shift); +} + +void Assembler::sub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SUB ., ., #{, } + // 0010 0101 ..10 0001 11.. .... .... .... + // size<23:22> | opc<18:16> = 001 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SUB_z_zi, zd, imm8, shift); +} + +void Assembler::subr(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // SUBR ., ., #{, } + // 0010 0101 ..10 0011 11.. .... .... .... + // size<23:22> | opc<18:16> = 011 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(SUBR_z_zi, zd, imm8, shift); +} + +void Assembler::umax(const ZRegister& zd, const ZRegister& zn, int imm8) { + // UMAX ., ., # + // 0010 0101 ..10 1001 110. .... .... .... + // size<23:22> | opc<18:16> = 001 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(UMAX_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8)); +} + +void Assembler::umin(const ZRegister& zd, const ZRegister& zn, int imm8) { + // UMIN ., ., # + // 0010 0101 ..10 1011 110. .... .... .... + // size<23:22> | opc<18:16> = 011 | o2<13> = 0 | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(UMIN_z_zi | SVESize(zd) | Rd(zd) | ImmUnsignedField<12, 5>(imm8)); +} + +void Assembler::uqadd(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // UQADD ., ., #{, } + // 0010 0101 ..10 0101 11.. .... .... .... + // size<23:22> | opc<18:16> = 101 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(UQADD_z_zi, zd, imm8, shift); +} + +void Assembler::uqsub(const ZRegister& zd, + const ZRegister& zn, + int imm8, + int shift) { + // UQSUB ., ., #{, } + // 0010 0101 ..10 0111 11.. .... .... .... + // size<23:22> | opc<18:16> = 111 | sh<13> | imm8<12:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + SVEIntAddSubtractImmUnpredicatedHelper(UQSUB_z_zi, zd, imm8, shift); +} + +// SVEMemLoad. + +void Assembler::SVELdSt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_signed, + Instr op) { + VIXL_ASSERT(addr.IsContiguous()); + + Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr); + Instr dtype = + SVEDtype(msize_in_bytes_log2, zt.GetLaneSizeInBytesLog2(), is_signed); + Emit(op | mem_op | dtype | Rt(zt) | PgLow8(pg)); +} + +void Assembler::SVELdSt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr, + Instr op) { + VIXL_ASSERT((num_regs >= 2) && (num_regs <= 4)); + + unsigned msize_in_bytes_log2 = zt1.GetLaneSizeInBytesLog2(); + Instr num = (num_regs - 1) << 21; + Instr msz = msize_in_bytes_log2 << 23; + Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, num_regs, addr); + Emit(op | mem_op | msz | num | Rt(zt1) | PgLow8(pg)); +} + +void Assembler::SVELd1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed) { + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); + if (is_signed) { + // Sign-extension is only possible when the vector elements are larger than + // the elements in memory. + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); + } + + if (addr.IsScatterGather()) { + bool is_load = true; + bool is_ff = false; + SVEScatterGatherHelper(msize_in_bytes_log2, + zt, + pg, + addr, + is_load, + is_signed, + is_ff); + return; + } + + Instr op = 0xffffffff; + if (addr.IsScalarPlusImmediate()) { + op = SVEContiguousLoad_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + op = SVEContiguousLoad_ScalarPlusScalarFixed; + } else { + VIXL_UNIMPLEMENTED(); + } + SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op); +} + +void Assembler::SVELdff1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed) { + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); + if (is_signed) { + // Sign-extension is only possible when the vector elements are larger than + // the elements in memory. + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); + } + + if (addr.IsScatterGather()) { + bool is_load = true; + bool is_ff = true; + SVEScatterGatherHelper(msize_in_bytes_log2, + zt, + pg, + addr, + is_load, + is_signed, + is_ff); + return; + } + + if (addr.IsPlainScalar()) { + // SVEMemOperand(x0) is treated as a scalar-plus-immediate form ([x0, #0]). + // In these instructions, we want to treat it as [x0, xzr]. + SVEMemOperand addr_scalar_plus_scalar(addr.GetScalarBase(), xzr); + // Guard against infinite recursion. + VIXL_ASSERT(!addr_scalar_plus_scalar.IsPlainScalar()); + SVELdff1Helper(msize_in_bytes_log2, + zt, + pg, + addr_scalar_plus_scalar, + is_signed); + return; + } + + Instr op = 0xffffffff; + if (addr.IsScalarPlusScalar()) { + op = SVEContiguousFirstFaultLoad_ScalarPlusScalarFixed; + } else { + VIXL_UNIMPLEMENTED(); + } + SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, is_signed, op); +} + +void Assembler::SVEScatterGatherHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + bool is_load, + bool is_signed, + bool is_first_fault) { + VIXL_ASSERT(addr.IsScatterGather()); + VIXL_ASSERT(zt.IsLaneSizeS() || zt.IsLaneSizeD()); + VIXL_ASSERT(is_load || !is_first_fault); + VIXL_ASSERT(is_load || !is_signed); + + Instr op = 0xffffffff; + if (addr.IsVectorPlusImmediate()) { + VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorBase())); + if (is_load) { + if (zt.IsLaneSizeS()) { + op = SVE32BitGatherLoad_VectorPlusImmFixed; + } else { + op = SVE64BitGatherLoad_VectorPlusImmFixed; + } + } else { + if (zt.IsLaneSizeS()) { + op = SVE32BitScatterStore_VectorPlusImmFixed; + } else { + op = SVE64BitScatterStore_VectorPlusImmFixed; + } + } + } else { + VIXL_ASSERT(addr.IsScalarPlusVector()); + VIXL_ASSERT(AreSameLaneSize(zt, addr.GetVectorOffset())); + SVEOffsetModifier mod = addr.GetOffsetModifier(); + if (zt.IsLaneSizeS()) { + VIXL_ASSERT((mod == SVE_UXTW) || (mod == SVE_SXTW)); + unsigned shift_amount = addr.GetShiftAmount(); + if (shift_amount == 0) { + if (is_load) { + op = SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsetsFixed; + } else { + op = SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsetsFixed; + } + } else if (shift_amount == 1) { + VIXL_ASSERT(msize_in_bytes_log2 == kHRegSizeInBytesLog2); + if (is_load) { + op = SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsetsFixed; + } else { + op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; + } + } else { + VIXL_ASSERT(shift_amount == 2); + VIXL_ASSERT(msize_in_bytes_log2 == kSRegSizeInBytesLog2); + if (is_load) { + op = SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsetsFixed; + } else { + op = SVE32BitScatterStore_ScalarPlus32BitScaledOffsetsFixed; + } + } + } else if (zt.IsLaneSizeD()) { + switch (mod) { + case NO_SVE_OFFSET_MODIFIER: + if (is_load) { + op = SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsetsFixed; + } else { + op = SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsetsFixed; + } + break; + case SVE_LSL: + if (is_load) { + op = SVE64BitGatherLoad_ScalarPlus64BitScaledOffsetsFixed; + } else { + op = SVE64BitScatterStore_ScalarPlus64BitScaledOffsetsFixed; + } + break; + case SVE_UXTW: + case SVE_SXTW: { + unsigned shift_amount = addr.GetShiftAmount(); + if (shift_amount == 0) { + if (is_load) { + op = + SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + } else { + op = + SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsetsFixed; + } + } else { + VIXL_ASSERT(shift_amount == msize_in_bytes_log2); + if (is_load) { + op = SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsetsFixed; + } else { + op = + SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsetsFixed; + } + } + break; + } + default: + VIXL_UNIMPLEMENTED(); + } + } + } + + Instr mem_op = SVEMemOperandHelper(msize_in_bytes_log2, 1, addr, is_load); + Instr msz = ImmUnsignedField<24, 23>(msize_in_bytes_log2); + Instr u = (!is_load || is_signed) ? 0 : (1 << 14); + Instr ff = is_first_fault ? (1 << 13) : 0; + Emit(op | mem_op | msz | u | ff | Rt(zt) | PgLow8(pg)); +} + +void Assembler::SVELd234Helper(int num_regs, + const ZRegister& zt1, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + } + + Instr op; + if (addr.IsScalarPlusImmediate()) { + op = SVELoadMultipleStructures_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + op = SVELoadMultipleStructures_ScalarPlusScalarFixed; + } else { + // These instructions don't support any other addressing modes. + VIXL_ABORT(); + } + SVELdSt234Helper(num_regs, zt1, pg, addr, op); +} + +// SVEMemContiguousLoad. + +#define VIXL_DEFINE_LD1(MSZ, LANE_SIZE) \ + void Assembler::ld1##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \ + } +#define VIXL_DEFINE_LD2(MSZ, LANE_SIZE) \ + void Assembler::ld2##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVELd234Helper(2, zt1, pg, addr); \ + } +#define VIXL_DEFINE_LD3(MSZ, LANE_SIZE) \ + void Assembler::ld3##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVELd234Helper(3, zt1, pg, addr); \ + } +#define VIXL_DEFINE_LD4(MSZ, LANE_SIZE) \ + void Assembler::ld4##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const ZRegister& zt4, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3, zt4); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVELd234Helper(4, zt1, pg, addr); \ + } + +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD1) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD2) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD3) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LD4) + +#define VIXL_DEFINE_LD1S(MSZ, LANE_SIZE) \ + void Assembler::ld1s##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELd1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \ + } +VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LD1S) + +// SVEMem32BitGatherAndUnsizedContiguous. + +void Assembler::SVELd1BroadcastHelper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + bool is_signed) { + VIXL_ASSERT(addr.IsScalarPlusImmediate()); + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() >= msize_in_bytes_log2); + if (is_signed) { + // Sign-extension is only possible when the vector elements are larger than + // the elements in memory. + VIXL_ASSERT(zt.GetLaneSizeInBytesLog2() != msize_in_bytes_log2); + } + + int64_t imm = addr.GetImmediateOffset(); + int divisor = 1 << msize_in_bytes_log2; + VIXL_ASSERT(imm % divisor == 0); + Instr dtype = SVEDtypeSplit(msize_in_bytes_log2, + zt.GetLaneSizeInBytesLog2(), + is_signed); + + Emit(SVELoadAndBroadcastElementFixed | dtype | RnSP(addr.GetScalarBase()) | + ImmUnsignedField<21, 16>(imm / divisor) | Rt(zt) | PgLow8(pg)); +} + +// This prototype maps to 4 instruction encodings: +// LD1RB_z_p_bi_u16 +// LD1RB_z_p_bi_u32 +// LD1RB_z_p_bi_u64 +// LD1RB_z_p_bi_u8 +void Assembler::ld1rb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, false); +} + +// This prototype maps to 3 instruction encodings: +// LD1RH_z_p_bi_u16 +// LD1RH_z_p_bi_u32 +// LD1RH_z_p_bi_u64 +void Assembler::ld1rh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, false); +} + +// This prototype maps to 2 instruction encodings: +// LD1RW_z_p_bi_u32 +// LD1RW_z_p_bi_u64 +void Assembler::ld1rw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kSRegSizeInBytesLog2, zt, pg, addr, false); +} + +void Assembler::ld1rd(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kDRegSizeInBytesLog2, zt, pg, addr, false); +} + +// This prototype maps to 3 instruction encodings: +// LD1RSB_z_p_bi_s16 +// LD1RSB_z_p_bi_s32 +// LD1RSB_z_p_bi_s64 +void Assembler::ld1rsb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kBRegSizeInBytesLog2, zt, pg, addr, true); +} + +// This prototype maps to 2 instruction encodings: +// LD1RSH_z_p_bi_s32 +// LD1RSH_z_p_bi_s64 +void Assembler::ld1rsh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kHRegSizeInBytesLog2, zt, pg, addr, true); +} + +void Assembler::ld1rsw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + SVELd1BroadcastHelper(kWRegSizeInBytesLog2, zt, pg, addr, true); +} + +void Assembler::ldr(const CPURegister& rt, const SVEMemOperand& addr) { + // LDR , [{, #, MUL VL}] + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + int64_t imm9 = addr.GetImmediateOffset(); + VIXL_ASSERT(IsInt9(imm9)); + Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10; + Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16; + + Instr op = LDR_z_bi; + if (rt.IsPRegister()) { + op = LDR_p_bi; + } + Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l); +} + +// SVEMem64BitGather. + +// This prototype maps to 3 instruction encodings: +// LDFF1B_z_p_bz_d_64_unscaled +// LDFF1B_z_p_bz_d_x32_unscaled +void Assembler::ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1B { .D }, /Z, [, .D] + // 1100 0100 010. .... 111. .... .... .... + // msz<24:23> = 00 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1B_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1B_z_p_ai_d +// LDFF1B_z_p_ai_s +void Assembler::ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1B { .D }, /Z, [.D{, #}] + // 1100 0100 001. .... 111. .... .... .... + // msz<24:23> = 00 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1B_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +// This prototype maps to 4 instruction encodings: +// LDFF1D_z_p_bz_d_64_scaled +// LDFF1D_z_p_bz_d_64_unscaled +// LDFF1D_z_p_bz_d_x32_scaled +// LDFF1D_z_p_bz_d_x32_unscaled +void Assembler::ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1D { .D }, /Z, [, .D, LSL #3] + // 1100 0101 111. .... 111. .... .... .... + // msz<24:23> = 11 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1D_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +void Assembler::ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1D { .D }, /Z, [.D{, #}] + // 1100 0101 101. .... 111. .... .... .... + // msz<24:23> = 11 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1D_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +// This prototype maps to 6 instruction encodings: +// LDFF1H_z_p_bz_d_64_scaled +// LDFF1H_z_p_bz_d_64_unscaled +// LDFF1H_z_p_bz_d_x32_scaled +// LDFF1H_z_p_bz_d_x32_unscaled +void Assembler::ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1H { .D }, /Z, [, .D, LSL #1] + // 1100 0100 111. .... 111. .... .... .... + // msz<24:23> = 01 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1H_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1H_z_p_ai_d +// LDFF1H_z_p_ai_s +void Assembler::ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1H { .D }, /Z, [.D{, #}] + // 1100 0100 101. .... 111. .... .... .... + // msz<24:23> = 01 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1H_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +// This prototype maps to 3 instruction encodings: +// LDFF1SB_z_p_bz_d_64_unscaled +// LDFF1SB_z_p_bz_d_x32_unscaled +void Assembler::ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1SB { .D }, /Z, [, .D] + // 1100 0100 010. .... 101. .... .... .... + // msz<24:23> = 00 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SB_z_p_bz_d_64_unscaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1SB_z_p_ai_d +// LDFF1SB_z_p_ai_s +void Assembler::ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1SB { .D }, /Z, [.D{, #}] + // 1100 0100 001. .... 101. .... .... .... + // msz<24:23> = 00 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SB_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | + ImmField<20, 16>(imm5)); +} + +// This prototype maps to 6 instruction encodings: +// LDFF1SH_z_p_bz_d_64_scaled +// LDFF1SH_z_p_bz_d_64_unscaled +// LDFF1SH_z_p_bz_d_x32_scaled +// LDFF1SH_z_p_bz_d_x32_unscaled +void Assembler::ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1SH { .D }, /Z, [, .D, LSL #1] + // 1100 0100 111. .... 101. .... .... .... + // msz<24:23> = 01 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SH_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1SH_z_p_ai_d +// LDFF1SH_z_p_ai_s +void Assembler::ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1SH { .D }, /Z, [.D{, #}] + // 1100 0100 101. .... 101. .... .... .... + // msz<24:23> = 01 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SH_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | + ImmField<20, 16>(imm5)); +} + +// This prototype maps to 4 instruction encodings: +// LDFF1SW_z_p_bz_d_64_scaled +// LDFF1SW_z_p_bz_d_64_unscaled +// LDFF1SW_z_p_bz_d_x32_scaled +// LDFF1SW_z_p_bz_d_x32_unscaled +void Assembler::ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1SW { .D }, /Z, [, .D, LSL #2] + // 1100 0101 011. .... 101. .... .... .... + // msz<24:23> = 10 | Zm<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SW_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +void Assembler::ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1SW { .D }, /Z, [.D{, #}] + // 1100 0101 001. .... 101. .... .... .... + // msz<24:23> = 10 | imm5<20:16> | U<14> = 0 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1SW_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | + ImmField<20, 16>(imm5)); +} + +// This prototype maps to 6 instruction encodings: +// LDFF1W_z_p_bz_d_64_scaled +// LDFF1W_z_p_bz_d_64_unscaled +// LDFF1W_z_p_bz_d_x32_scaled +// LDFF1W_z_p_bz_d_x32_unscaled +void Assembler::ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const Register& xn, + const ZRegister& zm) { + // LDFF1W { .D }, /Z, [, .D, LSL #2] + // 1100 0101 011. .... 111. .... .... .... + // msz<24:23> = 10 | Zm<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | Rn<9:5> + // | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1W_z_p_bz_d_64_scaled | Rt(zt) | PgLow8(pg) | RnSP(xn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// LDFF1W_z_p_ai_d +// LDFF1W_z_p_ai_s +void Assembler::ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const ZRegister& zn, + int imm5) { + // LDFF1W { .D }, /Z, [.D{, #}] + // 1100 0101 001. .... 111. .... .... .... + // msz<24:23> = 10 | imm5<20:16> | U<14> = 1 | ff<13> = 1 | Pg<12:10> | + // Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LDFF1W_z_p_ai_d | Rt(zt) | PgLow8(pg) | Rn(zn) | ImmField<20, 16>(imm5)); +} + +void Assembler::SVEGatherPrefetchVectorPlusImmediateHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsVectorPlusImmediate()); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + + Instr op = 0xffffffff; + switch (prefetch_size) { + case kBRegSize: + op = zn.IsLaneSizeS() ? static_cast(PRFB_i_p_ai_s) + : static_cast(PRFB_i_p_ai_d); + break; + case kHRegSize: + op = zn.IsLaneSizeS() ? static_cast(PRFH_i_p_ai_s) + : static_cast(PRFH_i_p_ai_d); + break; + case kSRegSize: + op = zn.IsLaneSizeS() ? static_cast(PRFW_i_p_ai_s) + : static_cast(PRFW_i_p_ai_d); + break; + case kDRegSize: + op = zn.IsLaneSizeS() ? static_cast(PRFD_i_p_ai_s) + : static_cast(PRFD_i_p_ai_d); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + int64_t imm5 = addr.GetImmediateOffset(); + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | Rn(zn) | + ImmUnsignedField<20, 16>(imm5)); +} + +void Assembler::SVEGatherPrefetchScalarPlusImmediateHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsScalarPlusImmediate()); + int64_t imm6 = addr.GetImmediateOffset(); + + Instr op = 0xffffffff; + switch (prefetch_size) { + case kBRegSize: + op = PRFB_i_p_bi_s; + break; + case kHRegSize: + op = PRFH_i_p_bi_s; + break; + case kSRegSize: + op = PRFW_i_p_bi_s; + break; + case kDRegSize: + op = PRFD_i_p_bi_s; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | + RnSP(addr.GetScalarBase()) | ImmField<21, 16>(imm6)); +} + +void Assembler::SVEContiguousPrefetchScalarPlusScalarHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsScalarPlusScalar()); + Instr op = 0xffffffff; + + switch (prefetch_size) { + case kBRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER); + op = PRFB_i_p_br_s; + break; + case kHRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); + VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2); + op = PRFH_i_p_br_s; + break; + case kSRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); + VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2); + op = PRFW_i_p_br_s; + break; + case kDRegSize: + VIXL_ASSERT(addr.GetOffsetModifier() == SVE_LSL); + VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2); + op = PRFD_i_p_br_s; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | + RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset())); +} + +void Assembler::SVEContiguousPrefetchScalarPlusVectorHelper( + PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + VIXL_ASSERT(addr.IsScalarPlusVector()); + ZRegister zm = addr.GetVectorOffset(); + SVEOffsetModifier mod = addr.GetOffsetModifier(); + + // All prefetch scalar-plus-vector addressing modes use a shift corresponding + // to the element size. + switch (prefetch_size) { + case kBRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kBRegSizeInBytesLog2); + break; + case kHRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kHRegSizeInBytesLog2); + break; + case kSRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kSRegSizeInBytesLog2); + break; + case kDRegSize: + VIXL_ASSERT(addr.GetShiftAmount() == kDRegSizeInBytesLog2); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Instr sx = 0; + Instr op = 0xffffffff; + if ((mod == NO_SVE_OFFSET_MODIFIER) || (mod == SVE_LSL)) { + VIXL_ASSERT(zm.IsLaneSizeD()); + + switch (prefetch_size) { + case kBRegSize: + VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); + op = PRFB_i_p_bz_d_64_scaled; + break; + case kHRegSize: + VIXL_ASSERT(mod == SVE_LSL); + op = PRFH_i_p_bz_d_64_scaled; + break; + case kSRegSize: + VIXL_ASSERT(mod == SVE_LSL); + op = PRFW_i_p_bz_d_64_scaled; + break; + case kDRegSize: + VIXL_ASSERT(mod == SVE_LSL); + op = PRFD_i_p_bz_d_64_scaled; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } else { + VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW)); + VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD()); + + switch (prefetch_size) { + case kBRegSize: + op = zm.IsLaneSizeS() ? static_cast(PRFB_i_p_bz_s_x32_scaled) + : static_cast(PRFB_i_p_bz_d_x32_scaled); + break; + case kHRegSize: + op = zm.IsLaneSizeS() ? static_cast(PRFH_i_p_bz_s_x32_scaled) + : static_cast(PRFH_i_p_bz_d_x32_scaled); + break; + case kSRegSize: + op = zm.IsLaneSizeS() ? static_cast(PRFW_i_p_bz_s_x32_scaled) + : static_cast(PRFW_i_p_bz_d_x32_scaled); + break; + case kDRegSize: + op = zm.IsLaneSizeS() ? static_cast(PRFD_i_p_bz_s_x32_scaled) + : static_cast(PRFD_i_p_bz_d_x32_scaled); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + if (mod == SVE_SXTW) { + sx = 1 << 22; + } + } + + Emit(op | SVEImmPrefetchOperation(prfop) | PgLow8(pg) | sx | + RnSP(addr.GetScalarBase()) | Rm(zm)); +} + +void Assembler::SVEPrefetchHelper(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr, + int prefetch_size) { + if (addr.IsVectorPlusImmediate()) { + // For example: + // [z0.s, #0] + SVEGatherPrefetchVectorPlusImmediateHelper(prfop, pg, addr, prefetch_size); + + } else if (addr.IsScalarPlusImmediate()) { + // For example: + // [x0, #42, mul vl] + SVEGatherPrefetchScalarPlusImmediateHelper(prfop, pg, addr, prefetch_size); + + } else if (addr.IsScalarPlusVector()) { + // For example: + // [x0, z0.s, sxtw] + SVEContiguousPrefetchScalarPlusVectorHelper(prfop, pg, addr, prefetch_size); + + } else if (addr.IsScalarPlusScalar()) { + // For example: + // [x0, x1] + SVEContiguousPrefetchScalarPlusScalarHelper(prfop, pg, addr, prefetch_size); + + } else { + VIXL_UNIMPLEMENTED(); + } +} + +void Assembler::prfb(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kBRegSize); +} + +void Assembler::prfd(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kDRegSize); +} + +void Assembler::prfh(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kHRegSize); +} + +void Assembler::prfw(PrefetchOperation prfop, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + SVEPrefetchHelper(prfop, pg, addr, kSRegSize); +} + +void Assembler::SVELd1St1ScaImmHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + Instr regoffset_op, + Instr immoffset_op, + int imm_divisor) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsScalarPlusScalar() || addr.IsScalarPlusImmediate()); + + Instr op; + if (addr.IsScalarPlusScalar()) { + op = regoffset_op | Rm(addr.GetScalarOffset()); + } else { + int64_t imm = addr.GetImmediateOffset(); + VIXL_ASSERT(((imm % imm_divisor) == 0) && IsInt4(imm / imm_divisor)); + op = immoffset_op | ImmField<19, 16>(imm / imm_divisor); + } + Emit(op | Rt(zt) | PgLow8(pg) | RnSP(addr.GetScalarBase())); +} + +void Assembler::SVELd1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize_bytes_log2, + bool is_signed) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(addr.IsVectorPlusScalar()); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + VIXL_ASSERT(AreSameLaneSize(zn, zt)); + + uint32_t esize = zn.GetLaneSizeInBytesLog2(); + uint32_t b14_13 = 0; + if (!is_signed) b14_13 = zn.IsLaneSizeS() ? 0x1 : 0x2; + + Instr op = 0x04008000; // LDNT1 with vector plus scalar addressing mode. + op |= (esize << 30) | (msize_bytes_log2 << 23) | (b14_13 << 13); + Emit(op | Rt(zt) | PgLow8(pg) | + SVEMemOperandHelper(msize_bytes_log2, 1, addr, true)); +} + +void Assembler::SVESt1VecScaHelper(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr, + uint32_t msize_bytes_log2) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(addr.IsVectorPlusScalar()); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + VIXL_ASSERT(AreSameLaneSize(zn, zt)); + + uint32_t bit22 = zn.IsLaneSizeS() ? (1 << 22) : 0; + Instr op = 0xe4002000; // STNT1 with vector plus scalar addressing mode. + op |= bit22 | (msize_bytes_log2 << 23); + Emit(op | Rt(zt) | PgLow8(pg) | + SVEMemOperandHelper(msize_bytes_log2, 1, addr, true)); +} + +#define VIXL_SVE_LD1R_LIST(V) \ + V(qb, 0, B, LD1RQB_z_p_br_contiguous, LD1RQB_z_p_bi_u8, 16) \ + V(qh, 1, H, LD1RQH_z_p_br_contiguous, LD1RQH_z_p_bi_u16, 16) \ + V(qw, 2, S, LD1RQW_z_p_br_contiguous, LD1RQW_z_p_bi_u32, 16) \ + V(qd, 3, D, LD1RQD_z_p_br_contiguous, LD1RQD_z_p_bi_u64, 16) \ + V(ob, 0, B, 0xa4200000, 0xa4202000, 32) \ + V(oh, 1, H, 0xa4a00000, 0xa4a02000, 32) \ + V(ow, 2, S, 0xa5200000, 0xa5202000, 32) \ + V(od, 3, D, 0xa5a00000, 0xa5a02000, 32) + +#define VIXL_DEFINE_ASM_FUNC(FN, SH, SZ, SCA, IMM, BYTES) \ + void Assembler::ld1r##FN(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT((BYTES == 16) || \ + ((BYTES == 32) && (CPUHas(CPUFeatures::kSVEF64MM)))); \ + VIXL_ASSERT(addr.IsScalarPlusImmediate() || addr.IsEquivalentToLSL(SH)); \ + VIXL_ASSERT(zt.IsLaneSize##SZ()); \ + SVELd1St1ScaImmHelper(zt, pg, addr, SCA, IMM, BYTES); \ + } +VIXL_SVE_LD1R_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC +#undef VIXL_SVE_LD1R_LIST + +#define VIXL_DEFINE_LDFF1(MSZ, LANE_SIZE) \ + void Assembler::ldff1##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, false); \ + } +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_LDFF1) + +#define VIXL_DEFINE_LDFF1S(MSZ, LANE_SIZE) \ + void Assembler::ldff1s##MSZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVELdff1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr, true); \ + } +VIXL_SVE_LOAD_STORE_SIGNED_VARIANT_LIST(VIXL_DEFINE_LDFF1S) + +void Assembler::ldnf1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(0, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(3, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(1, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(0, + zt, + pg, + addr, + /* is_signed = */ true, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(1, + zt, + pg, + addr, + /* is_signed = */ true, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(2, + zt, + pg, + addr, + /* is_signed = */ true, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnf1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(addr.IsPlainRegister() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + + SVELdSt1Helper(2, + zt, + pg, + addr, + /* is_signed = */ false, + SVEContiguousNonFaultLoad_ScalarPlusImmFixed); +} + +void Assembler::ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 0, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1B_z_p_br_contiguous, + LDNT1B_z_p_bi_contiguous); + } +} + +void Assembler::ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 3, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1D_z_p_br_contiguous, + LDNT1D_z_p_bi_contiguous); + } +} + +void Assembler::ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 1, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1H_z_p_br_contiguous, + LDNT1H_z_p_bi_contiguous); + } +} + +void Assembler::ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVELd1VecScaHelper(zt, pg, addr, 2, /* is_signed = */ false); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + LDNT1W_z_p_br_contiguous, + LDNT1W_z_p_bi_contiguous); + } +} + +void Assembler::ldnt1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)); + SVELd1VecScaHelper(zt, pg, addr, 0, /* is_signed = */ true); +} + +void Assembler::ldnt1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)); + SVELd1VecScaHelper(zt, pg, addr, 1, /* is_signed = */ true); +} + +void Assembler::ldnt1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2)); + SVELd1VecScaHelper(zt, pg, addr, 2, /* is_signed = */ true); +} + +Instr Assembler::SVEMemOperandHelper(unsigned msize_in_bytes_log2, + int num_regs, + const SVEMemOperand& addr, + bool is_load) { + VIXL_ASSERT((num_regs >= 1) && (num_regs <= 4)); + + Instr op = 0xfffffff; + if (addr.IsScalarPlusImmediate()) { + VIXL_ASSERT((addr.GetImmediateOffset() == 0) || addr.IsMulVl()); + int64_t imm = addr.GetImmediateOffset(); + VIXL_ASSERT((imm % num_regs) == 0); + op = RnSP(addr.GetScalarBase()) | ImmField<19, 16>(imm / num_regs); + + } else if (addr.IsScalarPlusScalar()) { + VIXL_ASSERT(addr.GetScalarOffset().IsZero() || + addr.IsEquivalentToLSL(msize_in_bytes_log2)); + op = RnSP(addr.GetScalarBase()) | Rm(addr.GetScalarOffset()); + + } else if (addr.IsVectorPlusImmediate()) { + ZRegister zn = addr.GetVectorBase(); + uint64_t imm = addr.GetImmediateOffset(); + VIXL_ASSERT(num_regs == 1); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + VIXL_ASSERT(IsMultiple(imm, (1 << msize_in_bytes_log2))); + op = Rn(zn) | ImmUnsignedField<20, 16>(imm >> msize_in_bytes_log2); + } else if (addr.IsVectorPlusScalar()) { + VIXL_ASSERT(addr.GetOffsetModifier() == NO_SVE_OFFSET_MODIFIER); + VIXL_ASSERT(addr.GetShiftAmount() == 0); + ZRegister zn = addr.GetVectorBase(); + VIXL_ASSERT(zn.IsLaneSizeS() || zn.IsLaneSizeD()); + Register xm = addr.GetScalarOffset(); + op = Rn(zn) | Rm(xm); + } else if (addr.IsScalarPlusVector()) { + // We have to support several different addressing modes. Some instructions + // support a subset of these, but the SVEMemOperand encoding is consistent. + Register xn = addr.GetScalarBase(); + ZRegister zm = addr.GetVectorOffset(); + SVEOffsetModifier mod = addr.GetOffsetModifier(); + Instr modifier_bit = 1 << (is_load ? 22 : 14); + Instr xs = (mod == SVE_SXTW) ? modifier_bit : 0; + VIXL_ASSERT(num_regs == 1); + + if (mod == SVE_LSL) { + // 64-bit scaled offset: [, .D, LSL #] + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(addr.GetShiftAmount() == msize_in_bytes_log2); + } else if (mod == NO_SVE_OFFSET_MODIFIER) { + // 64-bit unscaled offset: [, .D] + VIXL_ASSERT(zm.IsLaneSizeD()); + VIXL_ASSERT(addr.GetShiftAmount() == 0); + } else { + // 32-bit scaled offset: [, .S, #] + // 32-bit unscaled offset: [, .S, ] + // 32-bit unpacked scaled offset: [, .D, #] + // 32-bit unpacked unscaled offset: [, .D, ] + VIXL_ASSERT(zm.IsLaneSizeS() || zm.IsLaneSizeD()); + VIXL_ASSERT((mod == SVE_SXTW) || (mod == SVE_UXTW)); + VIXL_ASSERT((addr.GetShiftAmount() == 0) || + (addr.GetShiftAmount() == msize_in_bytes_log2)); + } + + // The form itself is encoded in the instruction opcode. + op = RnSP(xn) | Rm(zm) | xs; + } else { + VIXL_UNIMPLEMENTED(); + } + + return op; +} + +// SVEMemStore. + +void Assembler::SVESt1Helper(unsigned msize_in_bytes_log2, + const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + } + + if (addr.IsScatterGather()) { + bool is_load = false; + bool is_signed = false; + bool is_ff = false; + SVEScatterGatherHelper(msize_in_bytes_log2, + zt, + pg, + addr, + is_load, + is_signed, + is_ff); + return; + } + + Instr op; + if (addr.IsScalarPlusImmediate()) { + op = SVEContiguousStore_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + op = SVEContiguousStore_ScalarPlusScalarFixed; + } else { + VIXL_UNIMPLEMENTED(); + op = 0xffffffff; + } + SVELdSt1Helper(msize_in_bytes_log2, zt, pg, addr, false, op); +} + +void Assembler::SVESt234Helper(int num_regs, + const ZRegister& zt1, + const PRegister& pg, + const SVEMemOperand& addr) { + if (addr.IsScalarPlusScalar()) { + // Rm must not be xzr. + VIXL_ASSERT(!addr.GetScalarOffset().IsZero()); + } + + Instr op; + if (addr.IsScalarPlusImmediate()) { + op = SVEStoreMultipleStructures_ScalarPlusImmFixed; + } else if (addr.IsScalarPlusScalar()) { + op = SVEStoreMultipleStructures_ScalarPlusScalarFixed; + } else { + // These instructions don't support any other addressing modes. + VIXL_ABORT(); + } + SVELdSt234Helper(num_regs, zt1, pg, addr, op); +} + +#define VIXL_DEFINE_ST1(MSZ, LANE_SIZE) \ + void Assembler::st1##MSZ(const ZRegister& zt, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + SVESt1Helper(k##LANE_SIZE##RegSizeInBytesLog2, zt, pg, addr); \ + } +#define VIXL_DEFINE_ST2(MSZ, LANE_SIZE) \ + void Assembler::st2##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVESt234Helper(2, zt1, pg, addr); \ + } +#define VIXL_DEFINE_ST3(MSZ, LANE_SIZE) \ + void Assembler::st3##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVESt234Helper(3, zt1, pg, addr); \ + } +#define VIXL_DEFINE_ST4(MSZ, LANE_SIZE) \ + void Assembler::st4##MSZ(const ZRegister& zt1, \ + const ZRegister& zt2, \ + const ZRegister& zt3, \ + const ZRegister& zt4, \ + const PRegister& pg, \ + const SVEMemOperand& addr) { \ + USE(zt2, zt3, zt4); \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); \ + VIXL_ASSERT(AreConsecutive(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(AreSameFormat(zt1, zt2, zt3, zt4)); \ + VIXL_ASSERT(zt1.IsLaneSize##LANE_SIZE()); \ + SVESt234Helper(4, zt1, pg, addr); \ + } + +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST1) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST2) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST3) +VIXL_SVE_LOAD_STORE_VARIANT_LIST(VIXL_DEFINE_ST4) + +void Assembler::stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(0)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 0); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1B_z_p_br_contiguous, + STNT1B_z_p_bi_contiguous); + } +} + +void Assembler::stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(3)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 3); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1D_z_p_br_contiguous, + STNT1D_z_p_bi_contiguous); + } +} + +void Assembler::stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(1)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 1); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1H_z_p_br_contiguous, + STNT1H_z_p_bi_contiguous); + } +} + +void Assembler::stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && addr.IsMulVl()) || + (addr.IsScalarPlusScalar() && addr.IsEquivalentToLSL(2)) || + (addr.IsVectorPlusScalar() && CPUHas(CPUFeatures::kSVE2))); + if (addr.IsVectorPlusScalar()) { + SVESt1VecScaHelper(zt, pg, addr, 2); + } else { + SVELd1St1ScaImmHelper(zt, + pg, + addr, + STNT1W_z_p_br_contiguous, + STNT1W_z_p_bi_contiguous); + } +} + +void Assembler::str(const CPURegister& rt, const SVEMemOperand& addr) { + // STR , [{, #, MUL VL}] + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rt.IsPRegister() || rt.IsZRegister()); + VIXL_ASSERT(addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && + (addr.GetOffsetModifier() == SVE_MUL_VL))); + int64_t imm9 = addr.GetImmediateOffset(); + VIXL_ASSERT(IsInt9(imm9)); + Instr imm9l = ExtractUnsignedBitfield32(2, 0, imm9) << 10; + Instr imm9h = ExtractUnsignedBitfield32(8, 3, imm9) << 16; + + Instr op = STR_z_bi; + if (rt.IsPRegister()) { + op = STR_p_bi; + } + Emit(op | Rt(rt) | RnSP(addr.GetScalarBase()) | imm9h | imm9l); +} + +// SVEMulIndex. + +void Assembler::sdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + + Instr op = 0xffffffff; + switch (zda.GetLaneSizeInBits()) { + case kSRegSize: + VIXL_ASSERT(IsUint2(index)); + op = SDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn); + break; + case kDRegSize: + VIXL_ASSERT(IsUint1(index)); + op = SDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Emit(op); +} + +void Assembler::udot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + + Instr op = 0xffffffff; + switch (zda.GetLaneSizeInBits()) { + case kSRegSize: + VIXL_ASSERT(IsUint2(index)); + op = UDOT_z_zzzi_s | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn); + break; + case kDRegSize: + VIXL_ASSERT(IsUint1(index)); + op = UDOT_z_zzzi_d | Rx<19, 16>(zm) | (index << 20) | Rd(zda) | Rn(zn); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + Emit(op); +} + +// SVEPartitionBreak. + +void Assembler::brka(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000; + Emit(BRKA_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn)); +} + +void Assembler::brkas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Emit(BRKAS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +void Assembler::brkb(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Instr m = pg.IsMerging() ? 0x00000010 : 0x00000000; + Emit(BRKB_p_p_p | Pd(pd) | Pg<13, 10>(pg) | m | Pn(pn)); +} + +void Assembler::brkbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + + Emit(BRKBS_p_p_p_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +void Assembler::brkn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + USE(pm); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + VIXL_ASSERT(pd.Is(pm)); + + Emit(BRKN_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +void Assembler::brkns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + USE(pm); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeB() && pn.IsLaneSizeB()); + VIXL_ASSERT(pd.Is(pm)); + + Emit(BRKNS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn)); +} + +// SVEPermutePredicate. + +void Assembler::punpkhi(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + // PUNPKHI .H, .B + // 0000 0101 0011 0001 0100 000. ...0 .... + // H<16> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeH()); + VIXL_ASSERT(pn.IsLaneSizeB()); + + Emit(PUNPKHI_p_p | Pd(pd) | Pn(pn)); +} + +void Assembler::punpklo(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + // PUNPKLO .H, .B + // 0000 0101 0011 0000 0100 000. ...0 .... + // H<16> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.IsLaneSizeH()); + VIXL_ASSERT(pn.IsLaneSizeB()); + + Emit(PUNPKLO_p_p | Pd(pd) | Pn(pn)); +} + +void Assembler::rev(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn) { + // REV ., . + // 0000 0101 ..11 0100 0100 000. ...0 .... + // size<23:22> | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn)); + + Emit(REV_p_p | SVESize(pd) | Pd(pd) | Rx<8, 5>(pn)); +} + +void Assembler::trn1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // TRN1 ., ., . + // 0000 0101 ..10 .... 0101 000. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(TRN1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::trn2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // TRN2 ., ., . + // 0000 0101 ..10 .... 0101 010. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 10 | H<10> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(TRN2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::uzp1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // UZP1 ., ., . + // 0000 0101 ..10 .... 0100 100. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(UZP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::uzp2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // UZP2 ., ., . + // 0000 0101 ..10 .... 0100 110. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 01 | H<10> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(UZP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::zip1(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // ZIP1 ., ., . + // 0000 0101 ..10 .... 0100 000. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 0 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(ZIP1_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +void Assembler::zip2(const PRegisterWithLaneSize& pd, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // ZIP2 ., ., . + // 0000 0101 ..10 .... 0100 010. ...0 .... + // size<23:22> | Pm<19:16> | opc<12:11> = 00 | H<10> = 1 | Pn<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(pd, pn, pm)); + + Emit(ZIP2_p_pp | SVESize(pd) | Pd(pd) | Pn(pn) | Pm(pm)); +} + +// SVEPermuteVectorExtract. + +void Assembler::ext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + unsigned offset) { + // EXT .B, .B, .B, # + // 0000 0101 001. .... 000. .... .... .... + // imm8h<20:16> | imm8l<12:10> | Zm<9:5> | Zdn<4:0> + + // EXT .B, { .B, .B }, # + // 0000 0101 011. .... 000. .... .... .... + // imm8h<20:16> | imm8l<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(IsUint8(offset)); + + int imm8h = ExtractUnsignedBitfield32(7, 3, offset); + int imm8l = ExtractUnsignedBitfield32(2, 0, offset); + + Instr op; + if (zd.Is(zn)) { + // Destructive form. + op = EXT_z_zi_des | Rn(zm); + } else { + // Constructive form (requires SVE2). + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm)); + op = 0x05600000 | Rn(zn); + } + + Emit(op | Rd(zd) | ImmUnsignedField<20, 16>(imm8h) | + ImmUnsignedField<12, 10>(imm8l)); +} + +// SVEPermuteVectorInterleaving. + +void Assembler::trn1(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TRN1 ., ., . + // 0000 0101 ..1. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 100 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(TRN1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::trn2(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TRN2 ., ., . + // 0000 0101 ..1. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 101 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(TRN2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uzp1(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UZP1 ., ., . + // 0000 0101 ..1. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 010 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UZP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uzp2(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UZP2 ., ., . + // 0000 0101 ..1. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 011 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(UZP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::zip1(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ZIP1 ., ., . + // 0000 0101 ..1. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 000 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ZIP1_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::zip2(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ZIP2 ., ., . + // 0000 0101 ..1. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | opc<12:10> = 001 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(ZIP2_z_zz | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +// SVEPermuteVectorPredicated. + +void Assembler::clasta(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + // CLASTA , , , . + // 0000 0101 ..11 0000 101. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Rdn<4:0> + + USE(rn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rd.Is(rn)); + + Emit(CLASTA_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clasta(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + // CLASTA , , , . + // 0000 0101 ..10 1010 100. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Vdn<4:0> + + USE(vn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.Is(vn)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(vd, zm)); + + Emit(CLASTA_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + // CLASTA ., , ., . + // 0000 0101 ..10 1000 100. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(CLASTA_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clastb(const Register& rd, + const PRegister& pg, + const Register& rn, + const ZRegister& zm) { + // CLASTB , , , . + // 0000 0101 ..11 0001 101. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Rdn<4:0> + + USE(rn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(rd.Is(rn)); + + Emit(CLASTB_r_p_z | SVESize(zm) | Rd(rd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clastb(const VRegister& vd, + const PRegister& pg, + const VRegister& vn, + const ZRegister& zm) { + // CLASTB , , , . + // 0000 0101 ..10 1011 100. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Vdn<4:0> + + USE(vn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.Is(vn)); + VIXL_ASSERT(vd.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(vd, zm)); + + Emit(CLASTB_v_p_z | SVESize(zm) | Rd(vd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + // CLASTB ., , ., . + // 0000 0101 ..10 1001 100. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(CLASTB_z_p_zz | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::compact(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn) { + // COMPACT ., , . + // 0000 0101 1.10 0001 100. .... .... .... + // sz<22> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT((zd.GetLaneSizeInBits() == kSRegSize) || + (zd.GetLaneSizeInBits() == kDRegSize)); + + Instr sz = (zd.GetLaneSizeInBits() == kDRegSize) ? (1 << 22) : 0; + Emit(COMPACT_z_p_z | sz | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::cpy(const ZRegister& zd, + const PRegisterM& pg, + const Register& rn) { + // CPY ., /M, + // 0000 0101 ..10 1000 101. .... .... .... + // size<23:22> | Pg<12:10> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(static_cast(rn.GetSizeInBits()) >= + zd.GetLaneSizeInBits()); + + Emit(CPY_z_p_r | SVESize(zd) | Rd(zd) | PgLow8(pg) | RnSP(rn)); +} + +void Assembler::cpy(const ZRegister& zd, + const PRegisterM& pg, + const VRegister& vn) { + // CPY ., /M, + // 0000 0101 ..10 0000 100. .... .... .... + // size<23:22> | Pg<12:10> | Vn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vn.IsScalar()); + VIXL_ASSERT(static_cast(vn.GetSizeInBits()) == + zd.GetLaneSizeInBits()); + + Emit(CPY_z_p_v | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(vn)); +} + +void Assembler::lasta(const Register& rd, + const PRegister& pg, + const ZRegister& zn) { + // LASTA , , . + // 0000 0101 ..10 0000 101. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LASTA_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::lasta(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // LASTA , , . + // 0000 0101 ..10 0010 100. .... .... .... + // size<23:22> | B<16> = 0 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(LASTA_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::lastb(const Register& rd, + const PRegister& pg, + const ZRegister& zn) { + // LASTB , , . + // 0000 0101 ..10 0001 101. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(LASTB_r_p_z | SVESize(zn) | Rd(rd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::lastb(const VRegister& vd, + const PRegister& pg, + const ZRegister& zn) { + // LASTB , , . + // 0000 0101 ..10 0011 100. .... .... .... + // size<23:22> | B<16> = 1 | Pg<12:10> | Zn<9:5> | Vd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vd.IsScalar()); + + Emit(LASTB_v_p_z | SVESize(zn) | Rd(vd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::rbit(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // RBIT ., /M, . + // 0000 0101 ..10 0111 100. .... .... .... + // size<23:22> | opc<17:16> = 11 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(RBIT_z_p_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::revb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // REVB ., /M, . + // 0000 0101 ..10 0100 100. .... .... .... + // size<23:22> | opc<17:16> = 00 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.IsLaneSizeH() || zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(REVB_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::revh(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // REVH ., /M, . + // 0000 0101 ..10 0101 100. .... .... .... + // size<23:22> | opc<17:16> = 01 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(REVH_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::revw(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // REVW .D, /M, .D + // 0000 0101 ..10 0110 100. .... .... .... + // size<23:22> | opc<17:16> = 10 | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(REVW_z_z | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + if (zd.Aliases(zn)) { + // SPLICE ., , ., . + // 0000 0101 ..10 1100 100. .... .... .... + // size<23:22> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.Is(zn)); + + Emit(SPLICE_z_p_zz_des | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); + } else { + splice_con(zd, pg, zn, zm); + } +} + +void Assembler::splice_con(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn1, + const ZRegister& zn2) { + // SPLICE ., , { ., . } + // 0000 0101 ..10 1101 100. .... .... .... + // size<23:22> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + USE(zn2); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreConsecutive(zn1, zn2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn1, zn2)); + + Emit(0x052d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn1)); +} + +// SVEPermuteVectorUnpredicated. + +void Assembler::dup(const ZRegister& zd, const Register& xn) { + // DUP ., + // 0000 0101 ..10 0000 0011 10.. .... .... + // size<23:22> | Rn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(DUP_z_r | SVESize(zd) | Rd(zd) | RnSP(xn)); +} + +void Assembler::dup(const ZRegister& zd, const ZRegister& zn, unsigned index) { + // DUP ., .[] + // 0000 0101 ..1. .... 0010 00.. .... .... + // imm2<23:22> | tsz<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(zd, zn)); + VIXL_ASSERT((index * zd.GetLaneSizeInBits()) < 512); + int n = zd.GetLaneSizeInBytesLog2(); + unsigned imm_7 = (index << (n + 1)) | (1 << n); + VIXL_ASSERT(IsUint7(imm_7)); + unsigned imm_2 = ExtractUnsignedBitfield32(6, 5, imm_7); + unsigned tsz_5 = ExtractUnsignedBitfield32(4, 0, imm_7); + + Emit(DUP_z_zi | ImmUnsignedField<23, 22>(imm_2) | + ImmUnsignedField<20, 16>(tsz_5) | Rd(zd) | Rn(zn)); +} + +void Assembler::insr(const ZRegister& zdn, const Register& rm) { + // INSR ., + // 0000 0101 ..10 0100 0011 10.. .... .... + // size<23:22> | Rm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(INSR_z_r | SVESize(zdn) | Rd(zdn) | Rn(rm)); +} + +void Assembler::insr(const ZRegister& zdn, const VRegister& vm) { + // INSR ., + // 0000 0101 ..11 0100 0011 10.. .... .... + // size<23:22> | Vm<9:5> | Zdn<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(vm.IsScalar()); + + Emit(INSR_z_v | SVESize(zdn) | Rd(zdn) | Rn(vm)); +} + +void Assembler::rev(const ZRegister& zd, const ZRegister& zn) { + // REV ., . + // 0000 0101 ..11 1000 0011 10.. .... .... + // size<23:22> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(zd, zn)); + + Emit(REV_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::sunpkhi(const ZRegister& zd, const ZRegister& zn) { + // SUNPKHI ., . + // 0000 0101 ..11 0001 0011 10.. .... .... + // size<23:22> | U<17> = 0 | H<16> = 1 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(SUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::sunpklo(const ZRegister& zd, const ZRegister& zn) { + // SUNPKLO ., . + // 0000 0101 ..11 0000 0011 10.. .... .... + // size<23:22> | U<17> = 0 | H<16> = 0 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(SUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::tbl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TBL ., { . }, . + // 0000 0101 ..1. .... 0011 00.. .... .... + // size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(zd, zn, zm)); + + Emit(TBL_z_zz_1 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uunpkhi(const ZRegister& zd, const ZRegister& zn) { + // UUNPKHI ., . + // 0000 0101 ..11 0011 0011 10.. .... .... + // size<23:22> | U<17> = 1 | H<16> = 1 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(UUNPKHI_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +void Assembler::uunpklo(const ZRegister& zd, const ZRegister& zn) { + // UUNPKLO ., . + // 0000 0101 ..11 0010 0011 10.. .... .... + // size<23:22> | U<17> = 1 | H<16> = 0 | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + Emit(UUNPKLO_z_z | SVESize(zd) | Rd(zd) | Rn(zn)); +} + +// SVEPredicateCount. + +void Assembler::cntp(const Register& xd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + // CNTP , , . + // 0010 0101 ..10 0000 10.. ..0. .... .... + // size<23:22> | opc<18:16> = 000 | Pg<13:10> | o2<9> = 0 | Pn<8:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(pg.IsUnqualified()); + if (pg.HasLaneSize()) VIXL_ASSERT(AreSameFormat(pg, pn)); + + Emit(CNTP_r_p_p | SVESize(pn) | Rd(xd) | Pg<13, 10>(pg) | Pn(pn)); +} + +// SVEPredicateLogicalOp. +void Assembler::and_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(AND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::ands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::bic(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(BIC_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::bics(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(BICS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::eor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(EOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::eors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(EORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nand(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NAND_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nands(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NANDS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nor(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NOR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::nors(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(NORS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orn(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORN_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orns(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORNS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orr(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORR_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::orrs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameFormat(pd, pn, pm)); + VIXL_ASSERT(pd.IsLaneSizeB()); + Emit(ORRS_p_p_pp_z | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::sel(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + Emit(SEL_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +// SVEPredicateMisc. + +void Assembler::pfalse(const PRegisterWithLaneSize& pd) { + // PFALSE .B + // 0010 0101 0001 1000 1110 0100 0000 .... + // op<23> = 0 | S<22> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + // Ignore the lane size, since it makes no difference to the operation. + + Emit(PFALSE_p | Pd(pd)); +} + +void Assembler::pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + // PFIRST .B, , .B + // 0010 0101 0101 1000 1100 000. ...0 .... + // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pdn<3:0> + + USE(pn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.Is(pn)); + VIXL_ASSERT(pd.IsLaneSizeB()); + + Emit(PFIRST_p_p_p | Pd(pd) | Pg<8, 5>(pg)); +} + +void Assembler::pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + // PNEXT ., , . + // 0010 0101 ..01 1001 1100 010. ...0 .... + // size<23:22> | Pg<8:5> | Pdn<3:0> + + USE(pn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pd.Is(pn)); + + Emit(PNEXT_p_p_p | SVESize(pd) | Pd(pd) | Pg<8, 5>(pg)); +} + +void Assembler::ptest(const PRegister& pg, const PRegisterWithLaneSize& pn) { + // PTEST , .B + // 0010 0101 0101 0000 11.. ..0. ...0 0000 + // op<23> = 0 | S<22> = 1 | Pg<13:10> | Pn<8:5> | opc2<3:0> = 0000 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(pn.IsLaneSizeB()); + + Emit(PTEST_p_p | Pg<13, 10>(pg) | Rx<8, 5>(pn)); +} + +void Assembler::ptrue(const PRegisterWithLaneSize& pd, int pattern) { + // PTRUE .{, } + // 0010 0101 ..01 1000 1110 00.. ...0 .... + // size<23:22> | S<16> = 0 | pattern<9:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(PTRUE_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern)); +} + +void Assembler::ptrues(const PRegisterWithLaneSize& pd, int pattern) { + // PTRUES .{, } + // 0010 0101 ..01 1001 1110 00.. ...0 .... + // size<23:22> | S<16> = 1 | pattern<9:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(PTRUES_p_s | SVESize(pd) | Pd(pd) | ImmSVEPredicateConstraint(pattern)); +} + +void Assembler::rdffr(const PRegisterWithLaneSize& pd) { + // RDFFR .B + // 0010 0101 0001 1001 1111 0000 0000 .... + // op<23> = 0 | S<22> = 0 | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(RDFFR_p_f | Pd(pd)); +} + +void Assembler::rdffr(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + // RDFFR .B, /Z + // 0010 0101 0001 1000 1111 000. ...0 .... + // op<23> = 0 | S<22> = 0 | Pg<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(RDFFR_p_p_f | Pd(pd) | Pg<8, 5>(pg)); +} + +void Assembler::rdffrs(const PRegisterWithLaneSize& pd, const PRegisterZ& pg) { + // RDFFRS .B, /Z + // 0010 0101 0101 1000 1111 000. ...0 .... + // op<23> = 0 | S<22> = 1 | Pg<8:5> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(RDFFRS_p_p_f | Pd(pd) | Pg<8, 5>(pg)); +} + +// SVEPropagateBreak. + +void Assembler::brkpa(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPA .B, /Z, .B, .B + // 0010 0101 0000 .... 11.. ..0. ...0 .... + // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPA_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::brkpas(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPAS .B, /Z, .B, .B + // 0010 0101 0100 .... 11.. ..0. ...0 .... + // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 0 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPAS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::brkpb(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPB .B, /Z, .B, .B + // 0010 0101 0000 .... 11.. ..0. ...1 .... + // op<23> = 0 | S<22> = 0 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPB_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +void Assembler::brkpbs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn, + const PRegisterWithLaneSize& pm) { + // BRKPBS .B, /Z, .B, .B + // 0010 0101 0100 .... 11.. ..0. ...1 .... + // op<23> = 0 | S<22> = 1 | Pm<19:16> | Pg<13:10> | Pn<8:5> | B<4> = 1 | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(BRKPBS_p_p_pp | Pd(pd) | Pg<13, 10>(pg) | Pn(pn) | Pm(pm)); +} + +// SVEStackFrameAdjustment. + +void Assembler::addpl(const Register& xd, const Register& xn, int imm6) { + // ADDPL , , # + // 0000 0100 011. .... 0101 0... .... .... + // op<22> = 1 | Rn<20:16> | imm6<10:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(xn.IsX()); + + Emit(ADDPL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6)); +} + +void Assembler::addvl(const Register& xd, const Register& xn, int imm6) { + // ADDVL , , # + // 0000 0100 001. .... 0101 0... .... .... + // op<22> = 0 | Rn<20:16> | imm6<10:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(xn.IsX()); + + Emit(ADDVL_r_ri | RdSP(xd) | RmSP(xn) | ImmField<10, 5>(imm6)); +} + +// SVEStackFrameSize. + +void Assembler::rdvl(const Register& xd, int imm6) { + // RDVL , # + // 0000 0100 1011 1111 0101 0... .... .... + // op<22> = 0 | opc2<20:16> = 11111 | imm6<10:5> | Rd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(xd.IsX()); + + Emit(RDVL_r_i | Rd(xd) | ImmField<10, 5>(imm6)); +} + +// SVEVectorSelect. + +void Assembler::sel(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(SEL_z_p_zz | SVESize(zd) | Rd(zd) | Pg<13, 10>(pg) | Rn(zn) | Rm(zm)); +} + +// SVEWriteFFR. + +void Assembler::setffr() { + // SETFFR + // 0010 0101 0010 1100 1001 0000 0000 0000 + // opc<23:22> = 00 + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(SETFFR_f); +} + +void Assembler::wrffr(const PRegisterWithLaneSize& pn) { + // WRFFR .B + // 0010 0101 0010 1000 1001 000. ...0 0000 + // opc<23:22> = 00 | Pn<8:5> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + + Emit(WRFFR_f_p | Rx<8, 5>(pn)); +} + +// Aliases. + +void Assembler::bic(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + and_(zd, zn, ~imm); +} + +void Assembler::eon(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + eor(zd, zn, ~imm); +} + +void Assembler::orn(const ZRegister& zd, const ZRegister& zn, uint64_t imm) { + orr(zd, zn, ~imm); +} + + +void Assembler::fmov(const ZRegister& zd, const PRegisterM& pg, double imm) { + if (IsPositiveZero(imm)) { + cpy(zd, pg, 0); + } else { + fcpy(zd, pg, imm); + } +} + +void Assembler::fmov(const ZRegister& zd, double imm) { + if (IsPositiveZero(imm)) { + dup(zd, 0); + } else { + fdup(zd, imm); + } +} + +void Assembler::mov(const PRegister& pd, const PRegister& pn) { + // If the inputs carry a lane size, they must match. + VIXL_ASSERT((!pd.HasLaneSize() && !pn.HasLaneSize()) || + AreSameLaneSize(pd, pn)); + orr(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB()); +} + +void Assembler::mov(const PRegisterWithLaneSize& pd, + const PRegisterM& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + sel(pd, pg, pn, pd); +} + +void Assembler::mov(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + and_(pd, pg, pn, pn); +} + +void Assembler::mov(const ZRegister& zd, + const PRegister& pg, + int imm8, + int shift) { + VIXL_ASSERT(pg.IsMerging() || pg.IsZeroing()); + cpy(zd, pg, imm8, shift); +} + +void Assembler::mov(const ZRegister& zd, const Register& xn) { dup(zd, xn); } + +void Assembler::mov(const ZRegister& zd, const VRegister& vn) { + VIXL_ASSERT(vn.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(zd, vn)); + dup(zd, vn.Z().WithSameLaneSizeAs(vn), 0); +} + +void Assembler::mov(const ZRegister& zd, const ZRegister& zn) { + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + orr(zd.VnD(), zn.VnD(), zn.VnD()); +} + +void Assembler::mov(const ZRegister& zd, const ZRegister& zn, unsigned index) { + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + dup(zd, zn, index); +} + +void Assembler::mov(const ZRegister& zd, + const PRegisterM& pg, + const Register& rn) { + cpy(zd, pg, rn); +} + +void Assembler::mov(const ZRegister& zd, + const PRegisterM& pg, + const VRegister& vn) { + VIXL_ASSERT(vn.IsScalar()); + VIXL_ASSERT(AreSameLaneSize(zd, vn)); + cpy(zd, pg, vn); +} + +void Assembler::mov(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + sel(zd, pg, zn, zd); +} + +void Assembler::mov(const ZRegister& zd, uint64_t imm) { + // Mov is an alias of dupm for certain values of imm. Whilst this matters in + // the disassembler, for the assembler, we don't distinguish between the + // two mnemonics, and simply call dupm. + dupm(zd, imm); +} + +void Assembler::mov(const ZRegister& zd, int imm8, int shift) { + dup(zd, imm8, shift); +} + +void Assembler::movs(const PRegister& pd, const PRegister& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + orrs(pd.VnB(), pn.Zeroing(), pn.VnB(), pn.VnB()); +} + +void Assembler::movs(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + ands(pd, pg, pn, pn); +} + +void Assembler::not_(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + eor(pd, pg, pn, pg.VnB()); +} + +void Assembler::nots(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + eors(pd, pg, pn, pg.VnB()); +} + +// SVE2 + +void Assembler::adclb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // ADCLB ., ., . + // 0100 0101 0.0. .... 1101 00.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4500d000 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::adclt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // ADCLT ., ., . + // 0100 0101 0.0. .... 1101 01.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4500d400 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::addhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ADDHNB ., ., . + // 0100 0101 ..1. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206000 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::addhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // ADDHNT ., ., . + // 0100 0101 ..1. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206400 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::addp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // ADDP ., /M, ., . + // 0100 0100 ..01 0001 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4411a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::bcax(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BCAX .D, .D, .D, .D + // 0000 0100 011. .... 0011 10.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04603800 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::bdep(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // BDEP ., ., . + // 0100 0101 ..0. .... 1011 01.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4500b400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bext(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // BEXT ., ., . + // 0100 0101 ..0. .... 1011 00.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4500b000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bgrp(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // BGRP ., ., . + // 0100 0101 ..0. .... 1011 10.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEBitPerm)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4500b800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::bsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BSL .D, .D, .D, .D + // 0000 0100 001. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04203c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::bsl1n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BSL1N .D, .D, .D, .D + // 0000 0100 011. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04603c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::bsl2n(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // BSL2N .D, .D, .D, .D + // 0000 0100 101. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04a03c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // CADD ., ., ., + // 0100 0101 ..00 0000 1101 1... .... .... + // size<23:22> | op<16> | rot<10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT((rot == 90) || (rot == 270)); + + Instr rotate_bit = (rot == 90) ? 0 : (1 << 10); + Emit(0x4500d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm)); +} + +void Assembler::cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + // CDOT .D, .H, .H[], + // 0100 0100 111. .... 0100 .... .... .... + // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeB()) { + // Zm<18:16> | i2<20:19> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 3)); + zm_and_idx = (index << 19) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i1<20> + VIXL_ASSERT(zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 1)); + zm_and_idx = (index << 20) | Rx<19, 16>(zm); + } + + Instr rotate_bits = (rot / 90) << 10; + Emit(0x44a04000 | zm_and_idx | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::cdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // CDOT ., ., ., + // 0100 0100 ..0. .... 0001 .... .... .... + // size<23:22> | Zm<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 4)); + + Instr rotate_bits = (rot / 90) << 10; + Emit(0x44001000 | rotate_bits | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + // CMLA .H, .H, .H[], + // 0100 0100 101. .... 0110 .... .... .... + // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44a06000 | SVEMulComplexIndexHelper(zm, index) | rotate_bit | Rd(zda) | + Rn(zn)); +} + +void Assembler::cmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // CMLA ., ., ., + // 0100 0100 ..0. .... 0010 .... .... .... + // size<23:22> | Zm<20:16> | op<12> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44002000 | rotate_bit | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::eor3(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // EOR3 .D, .D, .D, .D + // 0000 0100 001. .... 0011 10.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04203800 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::eorbt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // EORBT ., ., . + // 0100 0101 ..0. .... 1001 00.. .... .... + // size<23:22> | Zm<20:16> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x45009000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::eortb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // EORTB ., ., . + // 0100 0101 ..0. .... 1001 01.. .... .... + // size<23:22> | Zm<20:16> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x45009400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::faddp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FADDP ., /M, ., . + // 0110 0100 ..01 0000 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64108000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fcvtlt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTLT .S, /M, .H + // 0110 0100 1000 1001 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Instr op; + if (zd.IsLaneSizeD() && zn.IsLaneSizeS()) { + op = 0x64cba000; + } else { + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeH()); + op = 0x6489a000; + } + + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtnt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTNT .S, /M, .D + // 0110 0100 1100 1010 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Instr op; + if (zd.IsLaneSizeS() && zn.IsLaneSizeD()) { + op = 0x64caa000; + } else { + VIXL_ASSERT(zd.IsLaneSizeH() && zn.IsLaneSizeS()); + op = 0x6488a000; + } + Emit(op | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTX .S, /M, .D + // 0110 0101 0000 1010 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeD()); + + Emit(0x650aa000 | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fcvtxnt(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FCVTXNT .S, /M, .D + // 0110 0100 0000 1010 101. .... .... .... + // opc<23:22> | opc2<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0x640aa000 | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::flogb(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // FLOGB ., /M, . + // 0110 0101 0001 1..0 101. .... .... .... + // opc<23:22> | opc2<18:17> | U<16> | Pg<12:10> | Zn<9:5> | Zd<4:0> | size<> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + + // Size field is encoded in bits <18:17> rather than <23:22>. + Instr size = SVESize(zd) >> 5; + Emit(0x6518a000 | size | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::fmaxnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAXNMP ., /M, ., . + // 0110 0100 ..01 0100 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64148000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMAXP ., /M, ., . + // 0110 0100 ..01 0110 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64168000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fminnmp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMINNMP ., /M, ., . + // 0110 0100 ..01 0101 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64158000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // FMINP ., /M, ., . + // 0110 0100 ..01 0111 100. .... .... .... + // size<23:22> | opc<18:16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x64178000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::fmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLALB .S, .H, .H + // 0110 0100 101. .... 1000 00.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a08000 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a04000 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::fmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLALT .S, .H, .H + // 0110 0100 101. .... 1000 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a08400 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // FMLALT .S, .H, .H + // 0110 0100 101. .... 1000 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a04400 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::fmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLSLB .S, .H, .H + // 0110 0100 101. .... 1010 00.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a0a000 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // FMLSLB .S, .H, .H + // 0110 0100 101. .... 1010 00.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a06000 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::fmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // FMLSLT .S, .H, .H + // 0110 0100 101. .... 1010 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + + Emit(0x64a0a400 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::fmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // FMLSLT .S, .H, .H + // 0110 0100 101. .... 1010 01.. .... .... + // o2<22> | Zm<20:16> | op<13> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeH() && zm.IsLaneSizeH()); + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + Instr zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + + Emit(0x64a06400 | Rd(zda) | Rn(zn) | zm_and_idx); +} + +void Assembler::histcnt(const ZRegister& zd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // HISTCNT ., /Z, ., . + // 0100 0101 ..1. .... 110. .... .... .... + // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeD()); + + Emit(0x4520c000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::histseg(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // HISTSEG .B, .B, .B + // 0100 0101 ..1. .... 1010 00.. .... .... + // size<23:22> | Zm<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT(zd.IsLaneSizeB()); + + Emit(0x4520a000 | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::match(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // MATCH ., /Z, ., . + // 0100 0101 ..1. .... 100. .... ...0 .... + // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | op<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(pd, zn, zm)); + VIXL_ASSERT(zm.IsLaneSizeB() || zm.IsLaneSizeH()); + + Emit(0x45208000 | SVESize(zm) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::mla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // MLA .D, .D, .D[] + // 0100 0100 111. .... 0000 10.. .... .... + // size<23:22> | opc<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + 0x44200800, + 0x44a00800, + 0x44e00800); + + Emit(synthesised_op | Rd(zda) | Rn(zn)); +} + +void Assembler::mls(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // MLS .D, .D, .D[] + // 0100 0100 111. .... 0000 11.. .... .... + // size<23:22> | opc<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + 0x44200c00, + 0x44a00c00, + 0x44e00c00); + + Emit(synthesised_op | Rd(zda) | Rn(zn)); +} + +void Assembler::mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // MUL .D, .D, .D[] + // 0100 0100 111. .... 1111 10.. .... .... + // size<23:22> | opc<20:16> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + 0x4420f800, + 0x44a0f800, + 0x44e0f800); + + Emit(synthesised_op | Rd(zd) | Rn(zn)); +} + +void Assembler::mul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // MUL ., ., . + // 0000 0100 ..1. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04206000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::nbsl(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + const ZRegister& zk) { + // NBSL .D, .D, .D, .D + // 0000 0100 111. .... 0011 11.. .... .... + // opc<23:22> | Zm<20:16> | o2<10> | Zk<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm, zk)); + VIXL_ASSERT(zd.IsLaneSizeD()); + + Emit(0x04e03c00 | Rd(zd) | Rm(zm) | Rn(zk)); +} + +void Assembler::nmatch(const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + const ZRegister& zm) { + // NMATCH ., /Z, ., . + // 0100 0101 ..1. .... 100. .... ...1 .... + // size<23:22> | Zm<20:16> | Pg<12:10> | Zn<9:5> | op<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(pd, zn, zm)); + VIXL_ASSERT(zm.IsLaneSizeB() || zm.IsLaneSizeH()); + + Emit(0x45208010 | SVESize(zm) | Pd(pd) | PgLow8(pg) | Rn(zn) | Rm(zm)); +} + +void Assembler::pmul(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // PMUL .B, .B, .B + // 0000 0100 001. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0x04206400 | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::pmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // PMULLB ., ., . + // 0100 0101 ..0. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + // SVEPmull128 is not supported + VIXL_ASSERT(!zd.IsLaneSizeQ()); + + Emit(0x45006800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::pmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // PMULLT ., ., . + // 0100 0101 ..0. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeS()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + // SVEPmull128 is not supported + VIXL_ASSERT(!zd.IsLaneSizeQ()); + + Emit(0x45006c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::raddhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RADDHNB ., ., . + // 0100 0101 ..1. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206800 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::raddhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RADDHNT ., ., . + // 0100 0101 ..1. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45206c00 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +#define VIXL_SVE_SHR_LIST(V) \ + V(rshrnb, 0x45201800) \ + V(rshrnt, 0x45201c00) \ + V(shrnb, 0x45201000) \ + V(shrnt, 0x45201400) \ + V(sqrshrnb, 0x45202800) \ + V(sqrshrnt, 0x45202c00) \ + V(sqrshrunb, 0x45200800) \ + V(sqrshrunt, 0x45200c00) \ + V(sqshrnb, 0x45202000) \ + V(sqshrnt, 0x45202400) \ + V(sqshrunb, 0x45200000) \ + V(sqshrunt, 0x45200400) \ + V(uqrshrnb, 0x45203800) \ + V(uqrshrnt, 0x45203c00) \ + V(uqshrnb, 0x45203000) \ + V(uqshrnt, 0x45203400) + +#define VIXL_DEFINE_ASM_FUNC(MNE, X) \ + void Assembler::MNE(const ZRegister& zd, const ZRegister& zn, int shift) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \ + VIXL_ASSERT(!zd.IsLaneSizeD() && !zd.IsLaneSizeQ()); \ + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); \ + Instr encoded_imm = \ + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); \ + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, X); \ + } +VIXL_SVE_SHR_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FUNC + +void Assembler::rsubhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RSUBHNB ., ., . + // 0100 0101 ..1. .... 0111 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207800 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::rsubhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // RSUBHNT ., ., . + // 0100 0101 ..1. .... 0111 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207c00 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saba(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SABA ., ., . + // 0100 0101 ..0. .... 1111 10.. .... .... + // size<23:22> | Zm<20:16> | U<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x4500f800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SABALB ., ., . + // 0100 0101 ..0. .... 1100 00.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500c000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SABALT ., ., . + // 0100 0101 ..0. .... 1100 01.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500c400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabdlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SABDLB ., ., . + // 0100 0101 ..0. .... 0011 00.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sabdlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SABDLT ., ., . + // 0100 0101 ..0. .... 0011 01.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sadalp(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn) { + // SADALP ., /M, . + // 0100 0100 ..00 0100 101. .... .... .... + // size<23:22> | U<16> | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4404a000 | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::saddlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDLB ., ., . + // 0100 0101 ..0. .... 0000 00.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddlbt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDLBT ., ., . + // 0100 0101 ..0. .... 1000 00.. .... .... + // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS()); + + Emit(0x45008000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDLT ., ., . + // 0100 0101 ..0. .... 0000 01.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDWB ., ., . + // 0100 0101 ..0. .... 0100 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::saddwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SADDWT ., ., . + // 0100 0101 ..0. .... 0100 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sbclb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SBCLB ., ., . + // 0100 0101 1.0. .... 1101 00.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4580d000 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sbclt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SBCLT ., ., . + // 0100 0101 1.0. .... 1101 01.. .... .... + // size<23:22> | Zm<20:16> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + + Instr sz = zda.IsLaneSizeD() ? (1 << 22) : 0; + Emit(0x4580d400 | sz | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::shadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SHADD ., /M, ., . + // 0100 0100 ..01 0000 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44108000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::shsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SHSUB ., /M, ., . + // 0100 0100 ..01 0010 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44128000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::shsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SHSUBR ., /M, ., . + // 0100 0100 ..01 0110 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44168000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sli(const ZRegister& zd, const ZRegister& zn, int shift) { + // SLI ., ., # + // 0100 0101 ..0. .... 1111 01.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | op<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500f400); +} + +void Assembler::smaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMAXP ., /M, ., . + // 0100 0100 ..01 0100 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4414a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SMINP ., /M, ., . + // 0100 0100 ..01 0110 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4416a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +#define VIXL_SVE_MULL_INDEX_LIST(V) \ + V(smullb, 0x44a0c000) \ + V(smullt, 0x44a0c400) \ + V(umullb, 0x44a0d000) \ + V(umullt, 0x44a0d400) \ + V(smlalb, 0x44a08000) \ + V(smlalt, 0x44a08400) \ + V(smlslb, 0x44a0a000) \ + V(smlslt, 0x44a0a400) \ + V(umlalb, 0x44a09000) \ + V(umlalt, 0x44a09400) \ + V(umlslb, 0x44a0b000) \ + V(umlslt, 0x44a0b400) \ + V(sqdmullb, 0x44a0e000) \ + V(sqdmullt, 0x44a0e400) + +#define VIXL_DEFINE_ASM_FUNC(MNE, OP) \ + void Assembler::MNE(const ZRegister& zda, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int index) { \ + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); \ + VIXL_ASSERT(AreSameLaneSize(zn, zm)); \ + VIXL_ASSERT(zda.IsLaneSizeD() || zda.IsLaneSizeS()); \ + VIXL_ASSERT(zda.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); \ + Instr zm_with_index = SVEMulLongIndexHelper(zm, index); \ + Emit(OP | SVESize(zda) | Rd(zda) | Rn(zn) | zm_with_index); \ + } +VIXL_SVE_MULL_INDEX_LIST(VIXL_DEFINE_ASM_FUNC) +#undef VIXL_DEFINE_ASM_FuNC + +void Assembler::smlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLALB ., ., . + // 0100 0100 ..0. .... 0100 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLALT ., ., . + // 0100 0100 ..0. .... 0100 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLSLB ., ., . + // 0100 0100 ..0. .... 0101 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SMLSLT ., ., . + // 0100 0100 ..0. .... 0101 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SMULH ., ., . + // 0000 0100 ..1. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04206800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::smullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SMULLB ., ., . + // 0100 0101 ..0. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::smullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SMULLT ., ., . + // 0100 0101 ..0. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqabs(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SQABS ., /M, . + // 0100 0100 ..00 1000 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(0x4408a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQADD ., /M, ., . + // 0100 0100 ..01 1000 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44188000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // SQCADD ., ., ., + // 0100 0101 ..00 0001 1101 1... .... .... + // size<23:22> | op<16> | rot<10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + VIXL_ASSERT((rot == 90) || (rot == 270)); + + Instr rotate_bit = (rot == 90) ? 0 : (1 << 10); + Emit(0x4501d800 | rotate_bit | SVESize(zd) | Rd(zd) | Rn(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlalb_z_zzzi_d +// sqdmlalb_z_zzzi_s +void Assembler::sqdmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLALB .D, .S, .S[] + // 0100 0100 111. .... 0010 .0.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44202000 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLALB ., ., . + // 0100 0100 ..0. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmlalbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLALBT ., ., . + // 0100 0100 ..0. .... 0000 10.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44000800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlalt_z_zzzi_d +// sqdmlalt_z_zzzi_s +void Assembler::sqdmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLALT .D, .S, .S[] + // 0100 0100 111. .... 0010 .1.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44202400 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLALT ., ., . + // 0100 0100 ..0. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlslb_z_zzzi_d +// sqdmlslb_z_zzzi_s +void Assembler::sqdmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLSLB .D, .S, .S[] + // 0100 0100 111. .... 0011 .0.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44203000 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLSLB ., ., . + // 0100 0100 ..0. .... 0110 10.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmlslbt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLSLBT ., ., . + // 0100 0100 ..0. .... 0000 11.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44000c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 2 instruction encodings: +// sqdmlslt_z_zzzi_d +// sqdmlslt_z_zzzi_s +void Assembler::sqdmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMLSLT .D, .S, .S[] + // 0100 0100 111. .... 0011 .1.. .... .... + // size<23:22> | opc<20:16> | S<12> | il<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.IsLaneSizeS() || zda.IsLaneSizeD()); + VIXL_ASSERT(index >= 0); + + Instr zm_and_idx = 0; + if (zm.IsLaneSizeH()) { + // Zm<18:16> | i3h<20:19> | i3l<11> + VIXL_ASSERT((zm.GetCode() <= 7) && (index <= 7)); + zm_and_idx = (ExtractUnsignedBitfield32(2, 1, index) << 19) | + (ExtractBit(index, 0) << 11) | Rx<18, 16>(zm); + } else { + // Zm<19:16> | i2h<20> | i2l<11> + VIXL_ASSERT(zm.IsLaneSizeS()); + VIXL_ASSERT((zm.GetCode() <= 15) && (index <= 3)); + zm_and_idx = (ExtractBit(index, 1) << 20) | (ExtractBit(index, 0) << 11) | + Rx<19, 16>(zm); + } + + Emit(0x44203400 | zm_and_idx | SVESize(zda) | Rd(zda) | Rn(zn)); +} + +void Assembler::sqdmlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMLSLT ., ., . + // 0100 0100 ..0. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | S<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x44006c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQDMULH .D, .D, .D[] + // 0100 0100 111. .... 1111 00.. .... .... + // size<23:22> | opc<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + 0x4420f000, + 0x44a0f000, + 0x44e0f000); + + Emit(synthesised_op | Rd(zd) | Rn(zn)); +} + +void Assembler::sqdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMULH ., ., . + // 0000 0100 ..1. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04207000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMULLB ., ., . + // 0100 0101 ..0. .... 0110 00.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45006000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqdmullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQDMULLT ., ., . + // 0100 0101 ..0. .... 0110 01.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45006400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqneg(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // SQNEG ., /M, . + // 0100 0100 ..00 1001 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + + Emit(0x4409a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + // SQRDCMLAH .H, .H, .H[], + // 0100 0100 101. .... 0111 .... .... .... + // size<23:22> | opc<20:16> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44a07000 | SVEMulComplexIndexHelper(zm, index) | rotate_bit | Rd(zda) | + Rn(zn)); +} + +void Assembler::sqrdcmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + // SQRDCMLAH ., ., ., + // 0100 0100 ..0. .... 0011 .... .... .... + // size<23:22> | Zm<20:16> | op<12> | rot<11:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + + Instr rotate_bit = (rot / 90) << 10; + Emit(0x44003000 | rotate_bit | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 3 instruction encodings: +// sqrdmlah_z_zzzi_d +// sqrdmlah_z_zzzi_h +// sqrdmlah_z_zzzi_s +void Assembler::sqrdmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr op_h = 0x44201000; + Instr op_s = op_h | (1 << 23); + Instr op_d = op_h | (3 << 22); + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + op_h, + op_s, + op_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +void Assembler::sqrdmlah(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQRDMLAH ., ., . + // 0100 0100 ..0. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x44007000 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +// This prototype maps to 3 instruction encodings: +// sqrdmlsh_z_zzzi_d +// sqrdmlsh_z_zzzi_h +// sqrdmlsh_z_zzzi_s +void Assembler::sqrdmlsh(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Instr op_h = 0x44201400; + Instr op_s = op_h | (1 << 23); + Instr op_d = op_h | (3 << 22); + // The encoding of opcode, index, Zm, and size are synthesized in this + // variable. + Instr synthesized_op = SVEMulIndexHelper(zda.GetLaneSizeInBytesLog2(), + zm, + index, + op_h, + op_s, + op_d); + + Emit(synthesized_op | Rd(zda) | Rn(zn)); +} + +void Assembler::sqrdmlsh(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // SQRDMLSH ., ., . + // 0100 0100 ..0. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | S<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x44007400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int index) { + // SQRDMULH .D, .D, .D[] + // 0100 0100 111. .... 1111 01.. .... .... + // size<23:22> | opc<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Instr synthesised_op = SVEMulIndexHelper(zd.GetLaneSizeInBytesLog2(), + zm, + index, + 0x4420f400, + 0x44a0f400, + 0x44e0f400); + + Emit(synthesised_op | Rd(zd) | Rn(zn)); +} + +void Assembler::sqrdmulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SQRDMULH ., ., . + // 0000 0100 ..1. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | R<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04207400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::sqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQRSHL ., /M, ., . + // 0100 0100 ..00 1010 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440a8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQRSHLR ., /M, ., . + // 0100 0100 ..00 1110 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440e8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // SQSHL ., /M, ., # + // 0000 0100 ..00 0110 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x04068000); +} + +void Assembler::sqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSHL ., /M, ., . + // 0100 0100 ..00 1000 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44088000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSHLR ., /M, ., . + // 0100 0100 ..00 1100 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440c8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqshlu(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // SQSHLU ., /M, ., # + // 0000 0100 ..00 1111 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040f8000); +} + +void Assembler::sqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSUB ., /M, ., . + // 0100 0100 ..01 1010 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441a8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SQSUBR ., /M, ., . + // 0100 0100 ..01 1110 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441e8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sqxtnb(const ZRegister& zd, const ZRegister& zn) { + // SQXTNB ., . + // 0100 0101 0.1. .000 0100 00.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204000); +} + +void Assembler::sqxtnt(const ZRegister& zd, const ZRegister& zn) { + // SQXTNT ., . + // 0100 0101 0.1. .000 0100 01.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204400); +} + +void Assembler::sqxtunb(const ZRegister& zd, const ZRegister& zn) { + // SQXTUNB ., . + // 0100 0101 0.1. .000 0101 00.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45205000); +} + +void Assembler::sqxtunt(const ZRegister& zd, const ZRegister& zn) { + // SQXTUNT ., . + // 0100 0101 0.1. .000 0101 01.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45205400); +} + +void Assembler::srhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SRHADD ., /M, ., . + // 0100 0100 ..01 0100 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44148000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::sri(const ZRegister& zd, const ZRegister& zn, int shift) { + // SRI ., ., # + // 0100 0101 ..0. .... 1111 00.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | op<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500f000); +} + +void Assembler::srshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SRSHL ., /M, ., . + // 0100 0100 ..00 0010 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44028000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::srshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SRSHLR ., /M, ., . + // 0100 0100 ..00 0110 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44068000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::srshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // SRSHR ., /M, ., # + // 0000 0100 ..00 1100 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040c8000); +} + +void Assembler::srsra(const ZRegister& zda, const ZRegister& zn, int shift) { + // SRSRA ., ., # + // 0100 0101 ..0. .... 1110 10.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e800); +} + +void Assembler::sshllb(const ZRegister& zd, const ZRegister& zn, int shift) { + // SSHLLB ., ., # + // 0100 0101 0.0. .... 1010 00.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a000); +} + +void Assembler::sshllt(const ZRegister& zd, const ZRegister& zn, int shift) { + // SSHLLT ., ., # + // 0100 0101 0.0. .... 1010 01.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a400); +} + +void Assembler::ssra(const ZRegister& zda, const ZRegister& zn, int shift) { + // SSRA ., ., # + // 0100 0101 ..0. .... 1110 00.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e000); +} + +void Assembler::ssublb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLB ., ., . + // 0100 0101 ..0. .... 0001 00.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssublbt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLBT ., ., . + // 0100 0101 ..0. .... 1000 10.. .... .... + // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS()); + + Emit(0x45008800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssublt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLT ., ., . + // 0100 0101 ..0. .... 0001 01.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssubltb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBLTB ., ., . + // 0100 0101 ..0. .... 1000 11.. .... .... + // size<23:22> | Zm<20:16> | S<11> | tb<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.IsLaneSizeD() || zd.IsLaneSizeH() || zd.IsLaneSizeS()); + + Emit(0x45008c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssubwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBWB ., ., . + // 0100 0101 ..0. .... 0101 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005000 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::ssubwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SSUBWT ., ., . + // 0100 0101 ..0. .... 0101 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005400 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +#if 0 +// This prototype maps to 2 instruction encodings: +// stnt1b_z_p_ar_d_64_unscaled +// stnt1b_z_p_ar_s_x32_unscaled +void Assembler::stnt1b(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1B { .D }, , [.D{, }] + // 1110 0100 000. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe4002000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} + +void Assembler::stnt1d(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1D { .D }, , [.D{, }] + // 1110 0101 100. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe5802000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} + +// This prototype maps to 2 instruction encodings: +// stnt1h_z_p_ar_d_64_unscaled +// stnt1h_z_p_ar_s_x32_unscaled +void Assembler::stnt1h(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1H { .D }, , [.D{, }] + // 1110 0100 100. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe4802000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} + +// This prototype maps to 2 instruction encodings: +// stnt1w_z_p_ar_d_64_unscaled +// stnt1w_z_p_ar_s_x32_unscaled +void Assembler::stnt1w(const ZRegister& zt, const PRegister& pg, const ZRegister& zn, const Register& rm) { + // STNT1W { .D }, , [.D{, }] + // 1110 0101 000. .... 001. .... .... .... + // msz<24:23> | Rm<20:16> | Pg<12:10> | Zn<9:5> | Zt<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + + Emit(0xe5002000 | Rt(zt) | PgLow8(pg) | Rn(zn) | Rm(rm)); +} +#endif + +void Assembler::subhnb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SUBHNB ., ., . + // 0100 0101 ..1. .... 0111 00.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207000 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::subhnt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // SUBHNT ., ., . + // 0100 0101 ..1. .... 0111 01.. .... .... + // size<23:22> | Zm<20:16> | S<12> | R<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() == (zd.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zn.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45207400 | SVESize(zn) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::suqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // SUQADD ., /M, ., . + // 0100 0100 ..01 1100 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441c8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::tbl(const ZRegister& zd, + const ZRegister& zn1, + const ZRegister& zn2, + const ZRegister& zm) { + // TBL ., { ., . }, . + // 0000 0101 ..1. .... 0010 10.. .... .... + // size<23:22> | Zm<20:16> | op<10> | Zn<9:5> | Zd<4:0> + + USE(zn2); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreConsecutive(zn1, zn2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn1, zn2, zm)); + + Emit(0x05202800 | SVESize(zd) | Rd(zd) | Rn(zn1) | Rn(zn2) | Rm(zm)); +} + +void Assembler::tbx(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // TBX ., ., . + // 0000 0101 ..1. .... 0010 11.. .... .... + // size<23:22> | Zm<20:16> | op<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x05202c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaba(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UABA ., ., . + // 0100 0101 ..0. .... 1111 11.. .... .... + // size<23:22> | Zm<20:16> | U<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x4500fc00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UABALB ., ., . + // 0100 0101 ..0. .... 1100 10.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500c800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UABALT ., ., . + // 0100 0101 ..0. .... 1100 11.. .... .... + // size<23:22> | Zm<20:16> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4500cc00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabdlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UABDLB ., ., . + // 0100 0101 ..0. .... 0011 10.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uabdlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UABDLT ., ., . + // 0100 0101 ..0. .... 0011 11.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45003c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uadalp(const ZRegister& zda, + const PRegisterM& pg, + const ZRegister& zn) { + // UADALP ., /M, . + // 0100 0100 ..00 0101 101. .... .... .... + // size<23:22> | U<16> | Pg<12:10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x4405a000 | SVESize(zda) | Rd(zda) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::uaddlb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDLB ., ., . + // 0100 0101 ..0. .... 0000 10.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaddlt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDLT ., ., . + // 0100 0101 ..0. .... 0000 11.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45000c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaddwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDWB ., ., . + // 0100 0101 ..0. .... 0100 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uaddwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UADDWT ., ., . + // 0100 0101 ..0. .... 0100 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45004c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UHADD ., /M, ., . + // 0100 0100 ..01 0001 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44118000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uhsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UHSUB ., /M, ., . + // 0100 0100 ..01 0011 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44138000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uhsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UHSUBR ., /M, ., . + // 0100 0100 ..01 0111 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44178000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umaxp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMAXP ., /M, ., . + // 0100 0100 ..01 0101 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4415a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uminp(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UMINP ., /M, ., . + // 0100 0100 ..01 0111 101. .... .... .... + // size<23:22> | opc<18:17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x4417a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::umlalb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLALB ., ., . + // 0100 0100 ..0. .... 0100 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umlalt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLALT ., ., . + // 0100 0100 ..0. .... 0100 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44004c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umlslb(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLSLB ., ., . + // 0100 0100 ..0. .... 0101 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005800 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umlslt(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + // UMLSLT ., ., . + // 0100 0100 ..0. .... 0101 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zda.IsLaneSizeB()); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zda.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Emit(0x44005c00 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::umulh(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UMULH ., ., . + // 0000 0100 ..1. .... 0110 11.. .... .... + // size<23:22> | Zm<20:16> | opc<11:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x04206c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::umullb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UMULLB ., ., . + // 0100 0101 ..0. .... 0111 10.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::umullt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // UMULLT ., ., . + // 0100 0101 ..0. .... 0111 11.. .... .... + // size<23:22> | Zm<20:16> | op<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(!zd.IsLaneSizeB() && !zd.IsLaneSizeQ()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == zn.GetLaneSizeInBytes() * 2); + + Emit(0x45007c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::uqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQADD ., /M, ., . + // 0100 0100 ..01 1001 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44198000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqrshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQRSHL ., /M, ., . + // 0100 0100 ..00 1011 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440b8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqrshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQRSHLR ., /M, ., . + // 0100 0100 ..00 1111 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440f8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // UQSHL ., /M, ., # + // 0000 0100 ..00 0111 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x04078000); +} + +void Assembler::uqshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSHL ., /M, ., . + // 0100 0100 ..00 1001 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44098000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSHLR ., /M, ., . + // 0100 0100 ..00 1101 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x440d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqsub(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSUB ., /M, ., . + // 0100 0100 ..01 1011 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441b8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqsubr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // UQSUBR ., /M, ., . + // 0100 0100 ..01 1111 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441f8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::uqxtnb(const ZRegister& zd, const ZRegister& zn) { + // UQXTNB ., . + // 0100 0101 0.1. .000 0100 10.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204800); +} + +void Assembler::uqxtnt(const ZRegister& zd, const ZRegister& zn) { + // UQXTNT ., . + // 0100 0101 0.1. .000 0100 11.. .... .... + // tszh<22> | tszl<20:19> | opc<12:11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() || zd.IsLaneSizeH() || zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() / 2)); + + // XTN instructions look like immediate shifts with zero shift distance. + Instr size = EncodeSVEShiftLeftImmediate(0, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, size, 0x45204c00); +} + +void Assembler::urecpe(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // URECPE .S, /M, .S + // 0100 0100 ..00 0000 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeS()); + + Emit(0x4400a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::urhadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // URHADD ., /M, ., . + // 0100 0100 ..01 0101 100. .... .... .... + // size<23:22> | R<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44158000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::urshl(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // URSHL ., /M, ., . + // 0100 0100 ..00 0011 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44038000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::urshlr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // URSHLR ., /M, ., . + // 0100 0100 ..00 0111 100. .... .... .... + // size<23:22> | Q<19> | R<18> | N<17> | U<16> | Pg<12:10> | Zm<9:5> | + // Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x44078000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::urshr(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + int shift) { + // URSHR ., /M, ., # + // 0000 0100 ..00 1101 100. .... .... .... + // tszh<23:22> | opc<19:18> | L<17> | U<16> | Pg<12:10> | tszl<9:8> | + // imm3<7:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediatePred(zd, pg, encoded_imm, 0x040d8000); +} + +void Assembler::ursqrte(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn) { + // URSQRTE .S, /M, .S + // 0100 0100 ..00 0001 101. .... .... .... + // size<23:22> | Q<19> | opc<17:16> | Pg<12:10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.IsLaneSizeS() && zn.IsLaneSizeS()); + + Emit(0x4401a000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zn)); +} + +void Assembler::ursra(const ZRegister& zda, const ZRegister& zn, int shift) { + // URSRA ., ., # + // 0100 0101 ..0. .... 1110 11.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500ec00); +} + +void Assembler::ushllb(const ZRegister& zd, const ZRegister& zn, int shift) { + // USHLLB ., ., # + // 0100 0101 0.0. .... 1010 10.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500a800); +} + +void Assembler::ushllt(const ZRegister& zd, const ZRegister& zn, int shift) { + // USHLLT ., ., # + // 0100 0101 0.0. .... 1010 11.. .... .... + // tszh<22> | tszl<20:19> | imm3<18:16> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(!zd.IsLaneSizeB()); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + + Instr encoded_imm = + EncodeSVEShiftLeftImmediate(shift, zn.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zn, encoded_imm, 0x4500ac00); +} + +void Assembler::usqadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm) { + // USQADD ., /M, ., . + // 0100 0100 ..01 1101 100. .... .... .... + // size<23:22> | op<18> | S<17> | U<16> | Pg<12:10> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zn, zm)); + + Emit(0x441d8000 | SVESize(zd) | Rd(zd) | PgLow8(pg) | Rn(zm)); +} + +void Assembler::usra(const ZRegister& zda, const ZRegister& zn, int shift) { + // USRA ., ., # + // 0100 0101 ..0. .... 1110 01.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | R<11> | U<10> | Zn<9:5> | + // Zda<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zda, zn)); + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zda.GetLaneSizeInBits()); + + SVEBitwiseShiftImmediate(zda, zn, encoded_imm, 0x4500e400); +} + +void Assembler::usublb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBLB ., ., . + // 0100 0101 ..0. .... 0001 10.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::usublt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBLT ., ., . + // 0100 0101 ..0. .... 0001 11.. .... .... + // size<23:22> | Zm<20:16> | op<13> | S<12> | U<11> | T<10> | Zn<9:5> | + // Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zn.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45001c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::usubwb(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBWB ., ., . + // 0100 0101 ..0. .... 0101 10.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005800 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::usubwt(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm) { + // USUBWT ., ., . + // 0100 0101 ..0. .... 0101 11.. .... .... + // size<23:22> | Zm<20:16> | S<12> | U<11> | T<10> | Zn<9:5> | Zd<4:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameLaneSize(zd, zn)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() == (zm.GetLaneSizeInBytes() * 2)); + VIXL_ASSERT(zd.GetLaneSizeInBytes() != kBRegSizeInBytes); + + Emit(0x45005c00 | SVESize(zd) | Rd(zd) | Rn(zn) | Rm(zm)); +} + +void Assembler::whilege(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEGE ., , + // 0010 0101 ..1. .... 000. 00.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200000 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilegt(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEGT ., , + // 0010 0101 ..1. .... 000. 00.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200010 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilehi(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEHI ., , + // 0010 0101 ..1. .... 000. 10.. ...1 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200810 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilehs(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEHS ., , + // 0010 0101 ..1. .... 000. 10.. ...0 .... + // size<23:22> | Rm<20:16> | sf<12> | U<11> | lt<10> | Rn<9:5> | eq<4> | + // Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(AreSameSizeAndType(rn, rm)); + const Instr sf = rn.Is64Bits() ? 0x00001000 : 0x00000000; + + Emit(0x25200800 | SVESize(pd) | sf | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilerw(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILERW ., , + // 0010 0101 ..1. .... 0011 00.. ...1 .... + // size<23:22> | Rm<20:16> | Rn<9:5> | rw<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(rn.IsX() && rm.IsX()); + + Emit(0x25203010 | SVESize(pd) | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::whilewr(const PRegisterWithLaneSize& pd, + const Register& rn, + const Register& rm) { + // WHILEWR ., , + // 0010 0101 ..1. .... 0011 00.. ...0 .... + // size<23:22> | Rm<20:16> | Rn<9:5> | rw<4> | Pd<3:0> + + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(rn.IsX() && rm.IsX()); + + Emit(0x25203000 | SVESize(pd) | Pd(pd) | Rn(rn) | Rm(rm)); +} + +void Assembler::xar(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int shift) { + // XAR ., ., ., # + // 0000 0100 ..1. .... 0011 01.. .... .... + // tszh<23:22> | tszl<20:19> | imm3<18:16> | Zm<9:5> | Zdn<4:0> + + USE(zn); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE2)); + VIXL_ASSERT(zd.Is(zn)); + VIXL_ASSERT(AreSameLaneSize(zd, zm)); + + Instr encoded_imm = + EncodeSVEShiftRightImmediate(shift, zd.GetLaneSizeInBits()); + SVEBitwiseShiftImmediate(zd, zm, encoded_imm, 0x04203400); +} + +void Assembler::fmmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT((CPUHas(CPUFeatures::kSVEF32MM) && zda.IsLaneSizeS()) || + (CPUHas(CPUFeatures::kSVEF64MM) && zda.IsLaneSizeD())); + VIXL_ASSERT(AreSameLaneSize(zda, zn, zm)); + + Emit(0x6420e400 | SVESize(zda) | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::smmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x45009800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::usmmla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x45809800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::ummla(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x45c09800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::usdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + + Emit(0x44807800 | Rd(zda) | Rn(zn) | Rm(zm)); +} + +void Assembler::usdot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + VIXL_ASSERT(zm.GetCode() <= 7); + VIXL_ASSERT(IsUint2(index)); + + Emit(0x44a01800 | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn)); +} + +void Assembler::sudot(const ZRegister& zda, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(CPUHas(CPUFeatures::kSVE)); + VIXL_ASSERT(CPUHas(CPUFeatures::kSVEI8MM)); + VIXL_ASSERT(zda.IsLaneSizeS()); + VIXL_ASSERT(zn.IsLaneSizeB() && zm.IsLaneSizeB()); + VIXL_ASSERT(zm.GetCode() <= 7); + VIXL_ASSERT(IsUint2(index)); + + Emit(0x44a01c00 | Rx<18, 16>(zm) | (index << 19) | Rd(zda) | Rn(zn)); +} + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/cpu-aarch64.cc b/3rdparty/vixl/src/aarch64/cpu-aarch64.cc new file mode 100644 index 0000000000..3b70cfcd05 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/cpu-aarch64.cc @@ -0,0 +1,581 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__)) +#include +#define VIXL_USE_LINUX_HWCAP 1 +#endif + +#include "../utils-vixl.h" + +#include "cpu-aarch64.h" + +namespace vixl { +namespace aarch64 { + + +const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned); +const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned); +const IDRegister::Field AA64PFR0::kRAS(28); +const IDRegister::Field AA64PFR0::kSVE(32); +const IDRegister::Field AA64PFR0::kDIT(48); +const IDRegister::Field AA64PFR0::kCSV2(56); +const IDRegister::Field AA64PFR0::kCSV3(60); + +const IDRegister::Field AA64PFR1::kBT(0); +const IDRegister::Field AA64PFR1::kSSBS(4); +const IDRegister::Field AA64PFR1::kMTE(8); +const IDRegister::Field AA64PFR1::kSME(24); + +const IDRegister::Field AA64ISAR0::kAES(4); +const IDRegister::Field AA64ISAR0::kSHA1(8); +const IDRegister::Field AA64ISAR0::kSHA2(12); +const IDRegister::Field AA64ISAR0::kCRC32(16); +const IDRegister::Field AA64ISAR0::kAtomic(20); +const IDRegister::Field AA64ISAR0::kRDM(28); +const IDRegister::Field AA64ISAR0::kSHA3(32); +const IDRegister::Field AA64ISAR0::kSM3(36); +const IDRegister::Field AA64ISAR0::kSM4(40); +const IDRegister::Field AA64ISAR0::kDP(44); +const IDRegister::Field AA64ISAR0::kFHM(48); +const IDRegister::Field AA64ISAR0::kTS(52); +const IDRegister::Field AA64ISAR0::kRNDR(60); + +const IDRegister::Field AA64ISAR1::kDPB(0); +const IDRegister::Field AA64ISAR1::kAPA(4); +const IDRegister::Field AA64ISAR1::kAPI(8); +const IDRegister::Field AA64ISAR1::kJSCVT(12); +const IDRegister::Field AA64ISAR1::kFCMA(16); +const IDRegister::Field AA64ISAR1::kLRCPC(20); +const IDRegister::Field AA64ISAR1::kGPA(24); +const IDRegister::Field AA64ISAR1::kGPI(28); +const IDRegister::Field AA64ISAR1::kFRINTTS(32); +const IDRegister::Field AA64ISAR1::kSB(36); +const IDRegister::Field AA64ISAR1::kSPECRES(40); +const IDRegister::Field AA64ISAR1::kBF16(44); +const IDRegister::Field AA64ISAR1::kDGH(48); +const IDRegister::Field AA64ISAR1::kI8MM(52); + +const IDRegister::Field AA64ISAR2::kWFXT(0); +const IDRegister::Field AA64ISAR2::kRPRES(4); +const IDRegister::Field AA64ISAR2::kMOPS(16); +const IDRegister::Field AA64ISAR2::kCSSC(52); + +const IDRegister::Field AA64MMFR0::kECV(60); + +const IDRegister::Field AA64MMFR1::kLO(16); +const IDRegister::Field AA64MMFR1::kAFP(44); + +const IDRegister::Field AA64MMFR2::kAT(32); + +const IDRegister::Field AA64ZFR0::kSVEver(0); +const IDRegister::Field AA64ZFR0::kAES(4); +const IDRegister::Field AA64ZFR0::kBitPerm(16); +const IDRegister::Field AA64ZFR0::kBF16(20); +const IDRegister::Field AA64ZFR0::kSHA3(32); +const IDRegister::Field AA64ZFR0::kSM4(40); +const IDRegister::Field AA64ZFR0::kI8MM(44); +const IDRegister::Field AA64ZFR0::kF32MM(52); +const IDRegister::Field AA64ZFR0::kF64MM(56); + +const IDRegister::Field AA64SMFR0::kSMEf32f32(32, 1); +const IDRegister::Field AA64SMFR0::kSMEb16f32(34, 1); +const IDRegister::Field AA64SMFR0::kSMEf16f32(35, 1); +const IDRegister::Field AA64SMFR0::kSMEi8i32(36); +const IDRegister::Field AA64SMFR0::kSMEf64f64(48, 1); +const IDRegister::Field AA64SMFR0::kSMEi16i64(52); +const IDRegister::Field AA64SMFR0::kSMEfa64(63, 1); + +CPUFeatures AA64PFR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP); + if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf); + if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON); + if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf); + if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS); + if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE); + if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT); + if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2); + if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM); + if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3); + return f; +} + +CPUFeatures AA64PFR1::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI); + if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS); + if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl); + if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions); + if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE); + if (Get(kMTE) >= 3) f.Combine(CPUFeatures::kMTE3); + if (Get(kSME) >= 1) f.Combine(CPUFeatures::kSME); + return f; +} + +CPUFeatures AA64ISAR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES); + if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q); + if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1); + if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2); + if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512); + if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32); + if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics); + if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM); + if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3); + if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3); + if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4); + if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct); + if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM); + if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM); + if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag); + if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG); + return f; +} + +CPUFeatures AA64ISAR1::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP); + if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP); + if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT); + if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma); + if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc); + if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm); + if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt); + if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB); + if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES); + if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16); + if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kEBF16); + if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH); + if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM); + + // Only one of these fields should be non-zero, but they have the same + // encodings, so merge the logic. + int apx = std::max(Get(kAPI), Get(kAPA)); + if (apx >= 1) { + f.Combine(CPUFeatures::kPAuth); + // APA (rather than API) indicates QARMA. + if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA); + if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC); + if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2); + if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC); + if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined); + } + + if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric); + if (Get(kGPA) >= 1) { + f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA); + } + return f; +} + +CPUFeatures AA64ISAR2::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kWFXT) >= 2) f.Combine(CPUFeatures::kWFXT); + if (Get(kRPRES) >= 1) f.Combine(CPUFeatures::kRPRES); + if (Get(kMOPS) >= 1) f.Combine(CPUFeatures::kMOPS); + if (Get(kCSSC) >= 1) f.Combine(CPUFeatures::kCSSC); + return f; +} + +CPUFeatures AA64MMFR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kECV) >= 1) f.Combine(CPUFeatures::kECV); + return f; +} + +CPUFeatures AA64MMFR1::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions); + if (Get(kAFP) >= 1) f.Combine(CPUFeatures::kAFP); + return f; +} + +CPUFeatures AA64MMFR2::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT); + return f; +} + +CPUFeatures AA64ZFR0::GetCPUFeatures() const { + // This register is only available with SVE, but reads-as-zero in its absence, + // so it's always safe to read it. + CPUFeatures f; + if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM); + if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM); + if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM); + if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSVESM4); + if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSVESHA3); + if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16); + if (Get(kBF16) >= 2) f.Combine(CPUFeatures::kSVE_EBF16); + if (Get(kBitPerm) >= 1) f.Combine(CPUFeatures::kSVEBitPerm); + if (Get(kAES) >= 1) f.Combine(CPUFeatures::kSVEAES); + if (Get(kAES) >= 2) f.Combine(CPUFeatures::kSVEPmull128); + if (Get(kSVEver) >= 1) f.Combine(CPUFeatures::kSVE2); + return f; +} + +CPUFeatures AA64SMFR0::GetCPUFeatures() const { + CPUFeatures f; + if (Get(kSMEf32f32) >= 1) f.Combine(CPUFeatures::kSMEf32f32); + if (Get(kSMEb16f32) >= 1) f.Combine(CPUFeatures::kSMEb16f32); + if (Get(kSMEf16f32) >= 1) f.Combine(CPUFeatures::kSMEf16f32); + if (Get(kSMEi8i32) >= 15) f.Combine(CPUFeatures::kSMEi8i32); + if (Get(kSMEf64f64) >= 1) f.Combine(CPUFeatures::kSMEf64f64); + if (Get(kSMEi16i64) >= 15) f.Combine(CPUFeatures::kSMEi16i64); + if (Get(kSMEfa64) >= 1) f.Combine(CPUFeatures::kSMEfa64); + return f; +} + +int IDRegister::Get(IDRegister::Field field) const { + int msb = field.GetMsb(); + int lsb = field.GetLsb(); + VIXL_STATIC_ASSERT(static_cast(Field::kMaxWidthInBits) < + (sizeof(int) * kBitsPerByte)); + switch (field.GetType()) { + case Field::kSigned: + return static_cast(ExtractSignedBitfield64(msb, lsb, value_)); + case Field::kUnsigned: + return static_cast(ExtractUnsignedBitfield64(msb, lsb, value_)); + } + VIXL_UNREACHABLE(); + return 0; +} + +CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() { + CPUFeatures f; +#define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \ + f.Combine(Read##NAME().GetCPUFeatures()); + VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG) +#undef VIXL_COMBINE_ID_REG + return f; +} + +CPUFeatures CPU::InferCPUFeaturesFromOS( + CPUFeatures::QueryIDRegistersOption option) { + CPUFeatures features; + +#ifdef VIXL_USE_LINUX_HWCAP + // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather + // than explicit bits, but explicit bits allow us to identify features that + // the toolchain doesn't know about. + static const CPUFeatures::Feature kFeatureBitsLow[] = + {// Bits 0-7 + CPUFeatures::kFP, + CPUFeatures::kNEON, + CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track. + CPUFeatures::kAES, + CPUFeatures::kPmull1Q, + CPUFeatures::kSHA1, + CPUFeatures::kSHA2, + CPUFeatures::kCRC32, + // Bits 8-15 + CPUFeatures::kAtomics, + CPUFeatures::kFPHalf, + CPUFeatures::kNEONHalf, + CPUFeatures::kIDRegisterEmulation, + CPUFeatures::kRDM, + CPUFeatures::kJSCVT, + CPUFeatures::kFcma, + CPUFeatures::kRCpc, + // Bits 16-23 + CPUFeatures::kDCPoP, + CPUFeatures::kSHA3, + CPUFeatures::kSM3, + CPUFeatures::kSM4, + CPUFeatures::kDotProduct, + CPUFeatures::kSHA512, + CPUFeatures::kSVE, + CPUFeatures::kFHM, + // Bits 24-31 + CPUFeatures::kDIT, + CPUFeatures::kUSCAT, + CPUFeatures::kRCpcImm, + CPUFeatures::kFlagM, + CPUFeatures::kSSBSControl, + CPUFeatures::kSB, + CPUFeatures::kPAuth, + CPUFeatures::kPAuthGeneric}; + VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsLow) < 64); + + static const CPUFeatures::Feature kFeatureBitsHigh[] = + {// Bits 0-7 + CPUFeatures::kDCCVADP, + CPUFeatures::kSVE2, + CPUFeatures::kSVEAES, + CPUFeatures::kSVEPmull128, + CPUFeatures::kSVEBitPerm, + CPUFeatures::kSVESHA3, + CPUFeatures::kSVESM4, + CPUFeatures::kAXFlag, + // Bits 8-15 + CPUFeatures::kFrintToFixedSizedInt, + CPUFeatures::kSVEI8MM, + CPUFeatures::kSVEF32MM, + CPUFeatures::kSVEF64MM, + CPUFeatures::kSVEBF16, + CPUFeatures::kI8MM, + CPUFeatures::kBF16, + CPUFeatures::kDGH, + // Bits 16-23 + CPUFeatures::kRNG, + CPUFeatures::kBTI, + CPUFeatures::kMTE, + CPUFeatures::kECV, + CPUFeatures::kAFP, + CPUFeatures::kRPRES, + CPUFeatures::kMTE3, + CPUFeatures::kSME, + // Bits 24-31 + CPUFeatures::kSMEi16i64, + CPUFeatures::kSMEf64f64, + CPUFeatures::kSMEi8i32, + CPUFeatures::kSMEf16f32, + CPUFeatures::kSMEb16f32, + CPUFeatures::kSMEf32f32, + CPUFeatures::kSMEfa64, + CPUFeatures::kWFXT, + // Bits 32-39 + CPUFeatures::kEBF16, + CPUFeatures::kSVE_EBF16}; + VIXL_STATIC_ASSERT(ArrayLength(kFeatureBitsHigh) < 64); + + auto combine_features = [&features](uint64_t hwcap, + const CPUFeatures::Feature* feature_array, + size_t features_size) { + for (size_t i = 0; i < features_size; i++) { + if (hwcap & (UINT64_C(1) << i)) features.Combine(feature_array[i]); + } + }; + + uint64_t hwcap_low = getauxval(AT_HWCAP); + uint64_t hwcap_high = getauxval(AT_HWCAP2); + + combine_features(hwcap_low, kFeatureBitsLow, ArrayLength(kFeatureBitsLow)); + combine_features(hwcap_high, kFeatureBitsHigh, ArrayLength(kFeatureBitsHigh)); + + // MTE support from HWCAP2 signifies FEAT_MTE1 and FEAT_MTE2 support + if (features.Has(CPUFeatures::kMTE)) { + features.Combine(CPUFeatures::kMTEInstructions); + } +#endif // VIXL_USE_LINUX_HWCAP + + if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) && + (features.Has(CPUFeatures::kIDRegisterEmulation))) { + features.Combine(InferCPUFeaturesFromIDRegisters()); + } + return features; +} + + +#ifdef __aarch64__ +#define VIXL_READ_ID_REG(NAME, MRS_ARG) \ + NAME CPU::Read##NAME() { \ + uint64_t value = 0; \ + __asm__("mrs %0, " MRS_ARG : "=r"(value)); \ + return NAME(value); \ + } +#else // __aarch64__ +#define VIXL_READ_ID_REG(NAME, MRS_ARG) \ + NAME CPU::Read##NAME() { \ + VIXL_UNREACHABLE(); \ + return NAME(0); \ + } +#endif // __aarch64__ + +VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG) + +#undef VIXL_READ_ID_REG + + +// Initialise to smallest possible cache size. +unsigned CPU::dcache_line_size_ = 1; +unsigned CPU::icache_line_size_ = 1; + + +// Currently computes I and D cache line size. +void CPU::SetUp() { + uint32_t cache_type_register = GetCacheType(); + + // The cache type register holds information about the caches, including I + // D caches line size. + static const int kDCacheLineSizeShift = 16; + static const int kICacheLineSizeShift = 0; + static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift; + static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift; + + // The cache type register holds the size of the I and D caches in words as + // a power of two. + uint32_t dcache_line_size_power_of_two = + (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift; + uint32_t icache_line_size_power_of_two = + (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift; + + dcache_line_size_ = 4 << dcache_line_size_power_of_two; + icache_line_size_ = 4 << icache_line_size_power_of_two; +} + + +uint32_t CPU::GetCacheType() { +#ifdef __aarch64__ + uint64_t cache_type_register; + // Copy the content of the cache type register to a core register. + __asm__ __volatile__("mrs %[ctr], ctr_el0" // NOLINT(runtime/references) + : [ctr] "=r"(cache_type_register)); + VIXL_ASSERT(IsUint32(cache_type_register)); + return static_cast(cache_type_register); +#else + // This will lead to a cache with 1 byte long lines, which is fine since + // neither EnsureIAndDCacheCoherency nor the simulator will need this + // information. + return 0; +#endif +} + + +// Query the SVE vector length. This requires CPUFeatures::kSVE. +int CPU::ReadSVEVectorLengthInBits() { +#ifdef __aarch64__ + uint64_t vl; + // To support compilers that don't understand `rdvl`, encode the value + // directly and move it manually. + __asm__( + " .word 0x04bf5100\n" // rdvl x0, #8 + " mov %[vl], x0\n" + : [vl] "=r"(vl) + : + : "x0"); + VIXL_ASSERT(vl <= INT_MAX); + return static_cast(vl); +#else + VIXL_UNREACHABLE(); + return 0; +#endif +} + + +void CPU::EnsureIAndDCacheCoherency(void* address, size_t length) { +#ifdef __aarch64__ + // Implement the cache synchronisation for all targets where AArch64 is the + // host, even if we're building the simulator for an AAarch64 host. This + // allows for cases where the user wants to simulate code as well as run it + // natively. + + if (length == 0) { + return; + } + + // The code below assumes user space cache operations are allowed. + + // Work out the line sizes for each cache, and use them to determine the + // start addresses. + uintptr_t start = reinterpret_cast(address); + uintptr_t dsize = static_cast(dcache_line_size_); + uintptr_t isize = static_cast(icache_line_size_); + uintptr_t dline = start & ~(dsize - 1); + uintptr_t iline = start & ~(isize - 1); + + // Cache line sizes are always a power of 2. + VIXL_ASSERT(IsPowerOf2(dsize)); + VIXL_ASSERT(IsPowerOf2(isize)); + uintptr_t end = start + length; + + do { + __asm__ __volatile__( + // Clean each line of the D cache containing the target data. + // + // dc : Data Cache maintenance + // c : Clean + // va : by (Virtual) Address + // u : to the point of Unification + // The point of unification for a processor is the point by which the + // instruction and data caches are guaranteed to see the same copy of a + // memory location. See ARM DDI 0406B page B2-12 for more information. + " dc cvau, %[dline]\n" + : + : [dline] "r"(dline) + // This code does not write to memory, but the "memory" dependency + // prevents GCC from reordering the code. + : "memory"); + dline += dsize; + } while (dline < end); + + __asm__ __volatile__( + // Make sure that the data cache operations (above) complete before the + // instruction cache operations (below). + // + // dsb : Data Synchronisation Barrier + // ish : Inner SHareable domain + // + // The point of unification for an Inner Shareable shareability domain is + // the point by which the instruction and data caches of all the + // processors + // in that Inner Shareable shareability domain are guaranteed to see the + // same copy of a memory location. See ARM DDI 0406B page B2-12 for more + // information. + " dsb ish\n" + : + : + : "memory"); + + do { + __asm__ __volatile__( + // Invalidate each line of the I cache containing the target data. + // + // ic : Instruction Cache maintenance + // i : Invalidate + // va : by Address + // u : to the point of Unification + " ic ivau, %[iline]\n" + : + : [iline] "r"(iline) + : "memory"); + iline += isize; + } while (iline < end); + + __asm__ __volatile__( + // Make sure that the instruction cache operations (above) take effect + // before the isb (below). + " dsb ish\n" + + // Ensure that any instructions already in the pipeline are discarded and + // reloaded from the new data. + // isb : Instruction Synchronisation Barrier + " isb\n" + : + : + : "memory"); +#else + // If the host isn't AArch64, we must be using the simulator, so this function + // doesn't have to do anything. + USE(address, length); +#endif +} + + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/cpu-features-auditor-aarch64.cc b/3rdparty/vixl/src/aarch64/cpu-features-auditor-aarch64.cc new file mode 100644 index 0000000000..563ee078d2 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/cpu-features-auditor-aarch64.cc @@ -0,0 +1,1840 @@ +// Copyright 2018, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Arm Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "cpu-features.h" +#include "globals-vixl.h" +#include "utils-vixl.h" +#include "decoder-aarch64.h" + +#include "cpu-features-auditor-aarch64.h" + +namespace vixl { +namespace aarch64 { + + +const CPUFeaturesAuditor::FormToVisitorFnMap* +CPUFeaturesAuditor::GetFormToVisitorFnMap() { + static const FormToVisitorFnMap form_to_visitor = { + DEFAULT_FORM_TO_VISITOR_MAP(CPUFeaturesAuditor), + SIM_AUD_VISITOR_MAP(CPUFeaturesAuditor), + {"fcmla_asimdelem_c_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fcmla_asimdelem_c_s"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlal2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlal_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmla_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmla_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlsl2_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmlsl_asimdelem_lh"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmls_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmls_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmulx_asimdelem_rh_h"_h, + &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmulx_asimdelem_r_sd"_h, + &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmul_asimdelem_rh_h"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"fmul_asimdelem_r_sd"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sdot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"smlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"smlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"smull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sqdmlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sqdmlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"sqdmull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"udot_asimdelem_d"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"umlal_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"umlsl_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + {"umull_asimdelem_l"_h, &CPUFeaturesAuditor::VisitNEONByIndexedElement}, + }; + return &form_to_visitor; +} + +// Every instruction must update last_instruction_, even if only to clear it, +// and every instruction must also update seen_ once it has been fully handled. +// This scope makes that simple, and allows early returns in the decode logic. +class CPUFeaturesAuditor::RecordInstructionFeaturesScope { + public: + explicit RecordInstructionFeaturesScope(CPUFeaturesAuditor* auditor) + : auditor_(auditor) { + auditor_->last_instruction_ = CPUFeatures::None(); + } + ~RecordInstructionFeaturesScope() { + auditor_->seen_.Combine(auditor_->last_instruction_); + } + + void Record(const CPUFeatures& features) { + auditor_->last_instruction_.Combine(features); + } + + void Record(CPUFeatures::Feature feature0, + CPUFeatures::Feature feature1 = CPUFeatures::kNone, + CPUFeatures::Feature feature2 = CPUFeatures::kNone, + CPUFeatures::Feature feature3 = CPUFeatures::kNone) { + auditor_->last_instruction_.Combine(feature0, feature1, feature2, feature3); + } + + // If exactly one of a or b is known to be available, record it. Otherwise, + // record both. This is intended for encodings that can be provided by two + // different features. + void RecordOneOrBothOf(CPUFeatures::Feature a, CPUFeatures::Feature b) { + bool hint_a = auditor_->available_.Has(a); + bool hint_b = auditor_->available_.Has(b); + if (hint_a && !hint_b) { + Record(a); + } else if (hint_b && !hint_a) { + Record(b); + } else { + Record(a, b); + } + } + + private: + CPUFeaturesAuditor* auditor_; +}; + +void CPUFeaturesAuditor::LoadStoreHelper(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(LoadStoreMask)) { + case LDR_b: + case LDR_q: + case STR_b: + case STR_q: + scope.Record(CPUFeatures::kNEON); + return; + case LDR_h: + case LDR_s: + case LDR_d: + case STR_h: + case STR_s: + case STR_d: + scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::LoadStorePairHelper(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(LoadStorePairMask)) { + case LDP_q: + case STP_q: + scope.Record(CPUFeatures::kNEON); + return; + case LDP_s: + case LDP_d: + case STP_s: + case STP_d: { + scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON); + return; + } + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitAddSubExtended(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitAddSubImmediate(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitAddSubShifted(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitAddSubWithCarry(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitRotateRightIntoFlags(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(RotateRightIntoFlagsMask)) { + case RMIF: + scope.Record(CPUFeatures::kFlagM); + return; + } +} + +void CPUFeaturesAuditor::VisitEvaluateIntoFlags(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(EvaluateIntoFlagsMask)) { + case SETF8: + case SETF16: + scope.Record(CPUFeatures::kFlagM); + return; + } +} + +void CPUFeaturesAuditor::VisitAtomicMemory(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(AtomicMemoryMask)) { + case LDAPRB: + case LDAPRH: + case LDAPR_w: + case LDAPR_x: + scope.Record(CPUFeatures::kRCpc); + return; + default: + // Everything else belongs to the Atomics extension. + scope.Record(CPUFeatures::kAtomics); + return; + } +} + +void CPUFeaturesAuditor::VisitBitfield(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitCompareBranch(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitConditionalBranch(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitConditionalCompareImmediate( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitConditionalCompareRegister( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitConditionalSelect(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitCrypto2RegSHA(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitCrypto3RegSHA(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitCryptoAES(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitDataProcessing1Source(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(DataProcessing1SourceMask)) { + case PACIA: + case PACIB: + case PACDA: + case PACDB: + case AUTIA: + case AUTIB: + case AUTDA: + case AUTDB: + case PACIZA: + case PACIZB: + case PACDZA: + case PACDZB: + case AUTIZA: + case AUTIZB: + case AUTDZA: + case AUTDZB: + case XPACI: + case XPACD: + scope.Record(CPUFeatures::kPAuth); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitDataProcessing2Source(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(DataProcessing2SourceMask)) { + case CRC32B: + case CRC32H: + case CRC32W: + case CRC32X: + case CRC32CB: + case CRC32CH: + case CRC32CW: + case CRC32CX: + scope.Record(CPUFeatures::kCRC32); + return; + case PACGA: + scope.Record(CPUFeatures::kPAuth, CPUFeatures::kPAuthGeneric); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitLoadStoreRCpcUnscaledOffset( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(LoadStoreRCpcUnscaledOffsetMask)) { + case LDAPURB: + case LDAPURSB_w: + case LDAPURSB_x: + case LDAPURH: + case LDAPURSH_w: + case LDAPURSH_x: + case LDAPUR_w: + case LDAPURSW: + case LDAPUR_x: + + // These stores don't actually have RCpc semantics but they're included with + // the RCpc extensions. + case STLURB: + case STLURH: + case STLUR_w: + case STLUR_x: + scope.Record(CPUFeatures::kRCpc, CPUFeatures::kRCpcImm); + return; + } +} + +void CPUFeaturesAuditor::VisitLoadStorePAC(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); + scope.Record(CPUFeatures::kPAuth); +} + +void CPUFeaturesAuditor::VisitDataProcessing3Source(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitException(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitExtract(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitFPCompare(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + switch (instr->Mask(FPCompareMask)) { + case FCMP_h: + case FCMP_h_zero: + case FCMPE_h: + case FCMPE_h_zero: + scope.Record(CPUFeatures::kFPHalf); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitFPConditionalCompare(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + switch (instr->Mask(FPConditionalCompareMask)) { + case FCCMP_h: + case FCCMPE_h: + scope.Record(CPUFeatures::kFPHalf); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitFPConditionalSelect(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + if (instr->Mask(FPConditionalSelectMask) == FCSEL_h) { + scope.Record(CPUFeatures::kFPHalf); + } +} + +void CPUFeaturesAuditor::VisitFPDataProcessing1Source( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + switch (instr->Mask(FPDataProcessing1SourceMask)) { + case FMOV_h: + case FABS_h: + case FNEG_h: + case FSQRT_h: + case FRINTN_h: + case FRINTP_h: + case FRINTM_h: + case FRINTZ_h: + case FRINTA_h: + case FRINTX_h: + case FRINTI_h: + scope.Record(CPUFeatures::kFPHalf); + return; + case FRINT32X_s: + case FRINT32X_d: + case FRINT32Z_s: + case FRINT32Z_d: + case FRINT64X_s: + case FRINT64X_d: + case FRINT64Z_s: + case FRINT64Z_d: + scope.Record(CPUFeatures::kFrintToFixedSizedInt); + return; + default: + // No special CPU features. + // This category includes some half-precision FCVT instructions that do + // not require FPHalf. + return; + } +} + +void CPUFeaturesAuditor::VisitFPDataProcessing2Source( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + switch (instr->Mask(FPDataProcessing2SourceMask)) { + case FMUL_h: + case FDIV_h: + case FADD_h: + case FSUB_h: + case FMAX_h: + case FMIN_h: + case FMAXNM_h: + case FMINNM_h: + case FNMUL_h: + scope.Record(CPUFeatures::kFPHalf); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitFPDataProcessing3Source( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + switch (instr->Mask(FPDataProcessing3SourceMask)) { + case FMADD_h: + case FMSUB_h: + case FNMADD_h: + case FNMSUB_h: + scope.Record(CPUFeatures::kFPHalf); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitFPFixedPointConvert(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + switch (instr->Mask(FPFixedPointConvertMask)) { + case FCVTZS_wh_fixed: + case FCVTZS_xh_fixed: + case FCVTZU_wh_fixed: + case FCVTZU_xh_fixed: + case SCVTF_hw_fixed: + case SCVTF_hx_fixed: + case UCVTF_hw_fixed: + case UCVTF_hx_fixed: + scope.Record(CPUFeatures::kFPHalf); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitFPImmediate(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require FP. + scope.Record(CPUFeatures::kFP); + if (instr->Mask(FPImmediateMask) == FMOV_h_imm) { + scope.Record(CPUFeatures::kFPHalf); + } +} + +void CPUFeaturesAuditor::VisitFPIntegerConvert(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(FPIntegerConvertMask)) { + case FCVTAS_wh: + case FCVTAS_xh: + case FCVTAU_wh: + case FCVTAU_xh: + case FCVTMS_wh: + case FCVTMS_xh: + case FCVTMU_wh: + case FCVTMU_xh: + case FCVTNS_wh: + case FCVTNS_xh: + case FCVTNU_wh: + case FCVTNU_xh: + case FCVTPS_wh: + case FCVTPS_xh: + case FCVTPU_wh: + case FCVTPU_xh: + case FCVTZS_wh: + case FCVTZS_xh: + case FCVTZU_wh: + case FCVTZU_xh: + case FMOV_hw: + case FMOV_hx: + case FMOV_wh: + case FMOV_xh: + case SCVTF_hw: + case SCVTF_hx: + case UCVTF_hw: + case UCVTF_hx: + scope.Record(CPUFeatures::kFP); + scope.Record(CPUFeatures::kFPHalf); + return; + case FMOV_dx: + scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON); + return; + case FMOV_d1_x: + case FMOV_x_d1: + scope.Record(CPUFeatures::kFP); + scope.Record(CPUFeatures::kNEON); + return; + case FJCVTZS: + scope.Record(CPUFeatures::kFP); + scope.Record(CPUFeatures::kJSCVT); + return; + default: + scope.Record(CPUFeatures::kFP); + return; + } +} + +void CPUFeaturesAuditor::VisitLoadLiteral(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(LoadLiteralMask)) { + case LDR_s_lit: + case LDR_d_lit: + scope.RecordOneOrBothOf(CPUFeatures::kFP, CPUFeatures::kNEON); + return; + case LDR_q_lit: + scope.Record(CPUFeatures::kNEON); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitLoadStoreExclusive(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(LoadStoreExclusiveMask)) { + case CAS_w: + case CASA_w: + case CASL_w: + case CASAL_w: + case CAS_x: + case CASA_x: + case CASL_x: + case CASAL_x: + case CASB: + case CASAB: + case CASLB: + case CASALB: + case CASH: + case CASAH: + case CASLH: + case CASALH: + case CASP_w: + case CASPA_w: + case CASPL_w: + case CASPAL_w: + case CASP_x: + case CASPA_x: + case CASPL_x: + case CASPAL_x: + scope.Record(CPUFeatures::kAtomics); + return; + case STLLRB: + case LDLARB: + case STLLRH: + case LDLARH: + case STLLR_w: + case LDLAR_w: + case STLLR_x: + case LDLAR_x: + scope.Record(CPUFeatures::kLORegions); + return; + default: + // No special CPU features. + return; + } +} + +void CPUFeaturesAuditor::VisitLoadStorePairNonTemporal( + const Instruction* instr) { + LoadStorePairHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStorePairOffset(const Instruction* instr) { + LoadStorePairHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStorePairPostIndex(const Instruction* instr) { + LoadStorePairHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStorePairPreIndex(const Instruction* instr) { + LoadStorePairHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStorePostIndex(const Instruction* instr) { + LoadStoreHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStorePreIndex(const Instruction* instr) { + LoadStoreHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStoreRegisterOffset( + const Instruction* instr) { + LoadStoreHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStoreUnscaledOffset( + const Instruction* instr) { + LoadStoreHelper(instr); +} + +void CPUFeaturesAuditor::VisitLoadStoreUnsignedOffset( + const Instruction* instr) { + LoadStoreHelper(instr); +} + +void CPUFeaturesAuditor::VisitLogicalImmediate(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitLogicalShifted(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitMoveWideImmediate(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEON2RegMisc(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + switch (instr->Mask(NEON2RegMiscFPMask)) { + case NEON_FABS: + case NEON_FNEG: + case NEON_FSQRT: + case NEON_FCVTL: + case NEON_FCVTN: + case NEON_FCVTXN: + case NEON_FRINTI: + case NEON_FRINTX: + case NEON_FRINTA: + case NEON_FRINTM: + case NEON_FRINTN: + case NEON_FRINTP: + case NEON_FRINTZ: + case NEON_FCVTNS: + case NEON_FCVTNU: + case NEON_FCVTPS: + case NEON_FCVTPU: + case NEON_FCVTMS: + case NEON_FCVTMU: + case NEON_FCVTZS: + case NEON_FCVTZU: + case NEON_FCVTAS: + case NEON_FCVTAU: + case NEON_SCVTF: + case NEON_UCVTF: + case NEON_FRSQRTE: + case NEON_FRECPE: + case NEON_FCMGT_zero: + case NEON_FCMGE_zero: + case NEON_FCMEQ_zero: + case NEON_FCMLE_zero: + case NEON_FCMLT_zero: + scope.Record(CPUFeatures::kFP); + return; + case NEON_FRINT32X: + case NEON_FRINT32Z: + case NEON_FRINT64X: + case NEON_FRINT64Z: + scope.Record(CPUFeatures::kFP, CPUFeatures::kFrintToFixedSizedInt); + return; + default: + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEON2RegMiscFP16(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEONHalf. + scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEON3Different(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEON3Same(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) { + scope.Record(CPUFeatures::kFP); + } + switch (instr->Mask(NEON3SameFHMMask)) { + case NEON_FMLAL: + case NEON_FMLAL2: + case NEON_FMLSL: + case NEON_FMLSL2: + scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM); + return; + default: + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEON3SameExtra(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + if ((instr->Mask(NEON3SameExtraFCMLAMask) == NEON_FCMLA) || + (instr->Mask(NEON3SameExtraFCADDMask) == NEON_FCADD)) { + scope.Record(CPUFeatures::kFP, CPUFeatures::kFcma); + if (instr->GetNEONSize() == 1) scope.Record(CPUFeatures::kNEONHalf); + } else { + switch (instr->Mask(NEON3SameExtraMask)) { + case NEON_SDOT: + case NEON_UDOT: + scope.Record(CPUFeatures::kDotProduct); + return; + case NEON_SQRDMLAH: + case NEON_SQRDMLSH: + scope.Record(CPUFeatures::kRDM); + return; + default: + // No additional features. + return; + } + } +} + +void CPUFeaturesAuditor::VisitNEON3SameFP16(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON FP16 support. + scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONAcrossLanes(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + if (instr->Mask(NEONAcrossLanesFP16FMask) == NEONAcrossLanesFP16Fixed) { + // FMAXV_H, FMINV_H, FMAXNMV_H, FMINNMV_H + scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf); + } else if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) { + // FMAXV, FMINV, FMAXNMV, FMINNMV + scope.Record(CPUFeatures::kFP); + } +} + +void CPUFeaturesAuditor::VisitNEONByIndexedElement(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + switch (instr->Mask(NEONByIndexedElementMask)) { + case NEON_SDOT_byelement: + case NEON_UDOT_byelement: + scope.Record(CPUFeatures::kDotProduct); + return; + case NEON_SQRDMLAH_byelement: + case NEON_SQRDMLSH_byelement: + scope.Record(CPUFeatures::kRDM); + return; + default: + // Fall through to check other instructions. + break; + } + switch (instr->Mask(NEONByIndexedElementFPLongMask)) { + case NEON_FMLAL_H_byelement: + case NEON_FMLAL2_H_byelement: + case NEON_FMLSL_H_byelement: + case NEON_FMLSL2_H_byelement: + scope.Record(CPUFeatures::kFP, CPUFeatures::kNEONHalf, CPUFeatures::kFHM); + return; + default: + // Fall through to check other instructions. + break; + } + switch (instr->Mask(NEONByIndexedElementFPMask)) { + case NEON_FMLA_H_byelement: + case NEON_FMLS_H_byelement: + case NEON_FMUL_H_byelement: + case NEON_FMULX_H_byelement: + scope.Record(CPUFeatures::kNEONHalf); + VIXL_FALLTHROUGH(); + case NEON_FMLA_byelement: + case NEON_FMLS_byelement: + case NEON_FMUL_byelement: + case NEON_FMULX_byelement: + scope.Record(CPUFeatures::kFP); + return; + default: + switch (instr->Mask(NEONByIndexedElementFPComplexMask)) { + case NEON_FCMLA_byelement: + scope.Record(CPUFeatures::kFP, CPUFeatures::kFcma); + if (instr->GetNEONSize() == 1) scope.Record(CPUFeatures::kNEONHalf); + return; + } + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEONCopy(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONExtract(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONLoadStoreMultiStruct( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONLoadStoreMultiStructPostIndex( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONLoadStoreSingleStruct( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONLoadStoreSingleStructPostIndex( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONModifiedImmediate(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + if (instr->GetNEONCmode() == 0xf) { + // FMOV (vector, immediate), double-, single- or half-precision. + scope.Record(CPUFeatures::kFP); + if (instr->ExtractBit(11)) scope.Record(CPUFeatures::kNEONHalf); + } +} + +void CPUFeaturesAuditor::VisitNEONPerm(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONScalar2RegMisc(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + switch (instr->Mask(NEONScalar2RegMiscFPMask)) { + case NEON_FRECPE_scalar: + case NEON_FRECPX_scalar: + case NEON_FRSQRTE_scalar: + case NEON_FCMGT_zero_scalar: + case NEON_FCMGE_zero_scalar: + case NEON_FCMEQ_zero_scalar: + case NEON_FCMLE_zero_scalar: + case NEON_FCMLT_zero_scalar: + case NEON_SCVTF_scalar: + case NEON_UCVTF_scalar: + case NEON_FCVTNS_scalar: + case NEON_FCVTNU_scalar: + case NEON_FCVTPS_scalar: + case NEON_FCVTPU_scalar: + case NEON_FCVTMS_scalar: + case NEON_FCVTMU_scalar: + case NEON_FCVTZS_scalar: + case NEON_FCVTZU_scalar: + case NEON_FCVTAS_scalar: + case NEON_FCVTAU_scalar: + case NEON_FCVTXN_scalar: + scope.Record(CPUFeatures::kFP); + return; + default: + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEONScalar2RegMiscFP16(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEONHalf. + scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONScalar3Diff(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONScalar3Same(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) { + scope.Record(CPUFeatures::kFP); + } +} + +void CPUFeaturesAuditor::VisitNEONScalar3SameExtra(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON and RDM. + scope.Record(CPUFeatures::kNEON, CPUFeatures::kRDM); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONScalar3SameFP16(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEONHalf. + scope.Record(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::kNEONHalf); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONScalarByIndexedElement( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + switch (instr->Mask(NEONScalarByIndexedElementMask)) { + case NEON_SQRDMLAH_byelement_scalar: + case NEON_SQRDMLSH_byelement_scalar: + scope.Record(CPUFeatures::kRDM); + return; + default: + switch (instr->Mask(NEONScalarByIndexedElementFPMask)) { + case NEON_FMLA_H_byelement_scalar: + case NEON_FMLS_H_byelement_scalar: + case NEON_FMUL_H_byelement_scalar: + case NEON_FMULX_H_byelement_scalar: + scope.Record(CPUFeatures::kNEONHalf); + VIXL_FALLTHROUGH(); + case NEON_FMLA_byelement_scalar: + case NEON_FMLS_byelement_scalar: + case NEON_FMUL_byelement_scalar: + case NEON_FMULX_byelement_scalar: + scope.Record(CPUFeatures::kFP); + return; + } + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEONScalarCopy(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitNEONScalarPairwise(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + switch (instr->Mask(NEONScalarPairwiseMask)) { + case NEON_FMAXNMP_h_scalar: + case NEON_FADDP_h_scalar: + case NEON_FMAXP_h_scalar: + case NEON_FMINNMP_h_scalar: + case NEON_FMINP_h_scalar: + scope.Record(CPUFeatures::kNEONHalf); + VIXL_FALLTHROUGH(); + case NEON_FADDP_scalar: + case NEON_FMAXP_scalar: + case NEON_FMAXNMP_scalar: + case NEON_FMINP_scalar: + case NEON_FMINNMP_scalar: + scope.Record(CPUFeatures::kFP); + return; + default: + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEONScalarShiftImmediate( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + switch (instr->Mask(NEONScalarShiftImmediateMask)) { + case NEON_FCVTZS_imm_scalar: + case NEON_FCVTZU_imm_scalar: + case NEON_SCVTF_imm_scalar: + case NEON_UCVTF_imm_scalar: + scope.Record(CPUFeatures::kFP); + // If immh is 0b001x then the data type is FP16, and requires kNEONHalf. + if ((instr->GetImmNEONImmh() & 0xe) == 0x2) { + scope.Record(CPUFeatures::kNEONHalf); + } + return; + default: + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEONShiftImmediate(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + switch (instr->Mask(NEONShiftImmediateMask)) { + case NEON_SCVTF_imm: + case NEON_UCVTF_imm: + case NEON_FCVTZS_imm: + case NEON_FCVTZU_imm: + scope.Record(CPUFeatures::kFP); + // If immh is 0b001x then the data type is FP16, and requires kNEONHalf. + if ((instr->GetImmNEONImmh() & 0xe) == 0x2) { + scope.Record(CPUFeatures::kNEONHalf); + } + return; + default: + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitNEONTable(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + // All of these instructions require NEON. + scope.Record(CPUFeatures::kNEON); + USE(instr); +} + +void CPUFeaturesAuditor::VisitPCRelAddressing(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +// Most SVE visitors require only SVE. +#define VIXL_SIMPLE_SVE_VISITOR_LIST(V) \ + V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitGatherLoad_VectorPlusImm) \ + V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitGatherPrefetch_VectorPlusImm) \ + V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \ + V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \ + V(SVE32BitScatterStore_VectorPlusImm) \ + V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitGatherLoad_VectorPlusImm) \ + V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitGatherPrefetch_VectorPlusImm) \ + V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \ + V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \ + V(SVE64BitScatterStore_VectorPlusImm) \ + V(SVEAddressGeneration) \ + V(SVEBitwiseLogicalUnpredicated) \ + V(SVEBitwiseShiftUnpredicated) \ + V(SVEFFRInitialise) \ + V(SVEFFRWriteFromPredicate) \ + V(SVEFPAccumulatingReduction) \ + V(SVEFPArithmeticUnpredicated) \ + V(SVEFPCompareVectors) \ + V(SVEFPCompareWithZero) \ + V(SVEFPComplexAddition) \ + V(SVEFPComplexMulAdd) \ + V(SVEFPComplexMulAddIndex) \ + V(SVEFPFastReduction) \ + V(SVEFPMulIndex) \ + V(SVEFPMulAdd) \ + V(SVEFPMulAddIndex) \ + V(SVEFPUnaryOpUnpredicated) \ + V(SVEIncDecByPredicateCount) \ + V(SVEIndexGeneration) \ + V(SVEIntArithmeticUnpredicated) \ + V(SVEIntCompareSignedImm) \ + V(SVEIntCompareUnsignedImm) \ + V(SVEIntCompareVectors) \ + V(SVEIntMulAddPredicated) \ + V(SVEIntMulAddUnpredicated) \ + V(SVEIntReduction) \ + V(SVEIntUnaryArithmeticPredicated) \ + V(SVEMovprfx) \ + V(SVEMulIndex) \ + V(SVEPermuteVectorExtract) \ + V(SVEPermuteVectorInterleaving) \ + V(SVEPredicateCount) \ + V(SVEPredicateLogical) \ + V(SVEPropagateBreak) \ + V(SVEStackFrameAdjustment) \ + V(SVEStackFrameSize) \ + V(SVEVectorSelect) \ + V(SVEBitwiseLogical_Predicated) \ + V(SVEBitwiseLogicalWithImm_Unpredicated) \ + V(SVEBitwiseShiftByImm_Predicated) \ + V(SVEBitwiseShiftByVector_Predicated) \ + V(SVEBitwiseShiftByWideElements_Predicated) \ + V(SVEBroadcastBitmaskImm) \ + V(SVEBroadcastFPImm_Unpredicated) \ + V(SVEBroadcastGeneralRegister) \ + V(SVEBroadcastIndexElement) \ + V(SVEBroadcastIntImm_Unpredicated) \ + V(SVECompressActiveElements) \ + V(SVEConditionallyBroadcastElementToVector) \ + V(SVEConditionallyExtractElementToSIMDFPScalar) \ + V(SVEConditionallyExtractElementToGeneralRegister) \ + V(SVEConditionallyTerminateScalars) \ + V(SVEConstructivePrefix_Unpredicated) \ + V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \ + V(SVEContiguousLoad_ScalarPlusImm) \ + V(SVEContiguousLoad_ScalarPlusScalar) \ + V(SVEContiguousNonFaultLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \ + V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \ + V(SVEContiguousNonTemporalStore_ScalarPlusImm) \ + V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \ + V(SVEContiguousPrefetch_ScalarPlusImm) \ + V(SVEContiguousPrefetch_ScalarPlusScalar) \ + V(SVEContiguousStore_ScalarPlusImm) \ + V(SVEContiguousStore_ScalarPlusScalar) \ + V(SVECopySIMDFPScalarRegisterToVector_Predicated) \ + V(SVECopyFPImm_Predicated) \ + V(SVECopyGeneralRegisterToVector_Predicated) \ + V(SVECopyIntImm_Predicated) \ + V(SVEElementCount) \ + V(SVEExtractElementToSIMDFPScalarRegister) \ + V(SVEExtractElementToGeneralRegister) \ + V(SVEFPArithmetic_Predicated) \ + V(SVEFPArithmeticWithImm_Predicated) \ + V(SVEFPConvertPrecision) \ + V(SVEFPConvertToInt) \ + V(SVEFPExponentialAccelerator) \ + V(SVEFPRoundToIntegralValue) \ + V(SVEFPTrigMulAddCoefficient) \ + V(SVEFPTrigSelectCoefficient) \ + V(SVEFPUnaryOp) \ + V(SVEIncDecRegisterByElementCount) \ + V(SVEIncDecVectorByElementCount) \ + V(SVEInsertSIMDFPScalarRegister) \ + V(SVEInsertGeneralRegister) \ + V(SVEIntAddSubtractImm_Unpredicated) \ + V(SVEIntAddSubtractVectors_Predicated) \ + V(SVEIntCompareScalarCountAndLimit) \ + V(SVEIntConvertToFP) \ + V(SVEIntDivideVectors_Predicated) \ + V(SVEIntMinMaxImm_Unpredicated) \ + V(SVEIntMinMaxDifference_Predicated) \ + V(SVEIntMulImm_Unpredicated) \ + V(SVEIntMulVectors_Predicated) \ + V(SVELoadAndBroadcastElement) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \ + V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \ + V(SVELoadMultipleStructures_ScalarPlusImm) \ + V(SVELoadMultipleStructures_ScalarPlusScalar) \ + V(SVELoadPredicateRegister) \ + V(SVELoadVectorRegister) \ + V(SVEPartitionBreakCondition) \ + V(SVEPermutePredicateElements) \ + V(SVEPredicateFirstActive) \ + V(SVEPredicateInitialize) \ + V(SVEPredicateNextActive) \ + V(SVEPredicateReadFromFFR_Predicated) \ + V(SVEPredicateReadFromFFR_Unpredicated) \ + V(SVEPredicateTest) \ + V(SVEPredicateZero) \ + V(SVEPropagateBreakToNextPartition) \ + V(SVEReversePredicateElements) \ + V(SVEReverseVectorElements) \ + V(SVEReverseWithinElements) \ + V(SVESaturatingIncDecRegisterByElementCount) \ + V(SVESaturatingIncDecVectorByElementCount) \ + V(SVEStoreMultipleStructures_ScalarPlusImm) \ + V(SVEStoreMultipleStructures_ScalarPlusScalar) \ + V(SVEStorePredicateRegister) \ + V(SVEStoreVectorRegister) \ + V(SVETableLookup) \ + V(SVEUnpackPredicateElements) \ + V(SVEUnpackVectorElements) \ + V(SVEVectorSplice) + +#define VIXL_DEFINE_SIMPLE_SVE_VISITOR(NAME) \ + void CPUFeaturesAuditor::Visit##NAME(const Instruction* instr) { \ + RecordInstructionFeaturesScope scope(this); \ + scope.Record(CPUFeatures::kSVE); \ + USE(instr); \ + } +VIXL_SIMPLE_SVE_VISITOR_LIST(VIXL_DEFINE_SIMPLE_SVE_VISITOR) +#undef VIXL_DEFINE_SIMPLE_SVE_VISITOR +#undef VIXL_SIMPLE_SVE_VISITOR_LIST + +void CPUFeaturesAuditor::VisitSystem(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + if (instr->Mask(SystemHintFMask) == SystemHintFixed) { + CPUFeatures required; + switch (instr->GetInstructionBits()) { + case PACIA1716: + case PACIB1716: + case AUTIA1716: + case AUTIB1716: + case PACIAZ: + case PACIASP: + case PACIBZ: + case PACIBSP: + case AUTIAZ: + case AUTIASP: + case AUTIBZ: + case AUTIBSP: + case XPACLRI: + required.Combine(CPUFeatures::kPAuth); + break; + default: + switch (instr->GetImmHint()) { + case ESB: + required.Combine(CPUFeatures::kRAS); + break; + case BTI: + case BTI_j: + case BTI_c: + case BTI_jc: + required.Combine(CPUFeatures::kBTI); + break; + default: + break; + } + break; + } + + // These are all HINT instructions, and behave as NOPs if the corresponding + // features are not implemented, so we record the corresponding features + // only if they are available. + if (available_.Has(required)) scope.Record(required); + } else if (instr->Mask(SystemSysMask) == SYS) { + switch (instr->GetSysOp()) { + // DC instruction variants. + case CGVAC: + case CGDVAC: + case CGVAP: + case CGDVAP: + case CIGVAC: + case CIGDVAC: + case GVA: + case GZVA: + scope.Record(CPUFeatures::kMTE); + break; + case CVAP: + scope.Record(CPUFeatures::kDCPoP); + break; + case CVADP: + scope.Record(CPUFeatures::kDCCVADP); + break; + case IVAU: + case CVAC: + case CVAU: + case CIVAC: + case ZVA: + // No special CPU features. + break; + } + } else if (instr->Mask(SystemPStateFMask) == SystemPStateFixed) { + switch (instr->Mask(SystemPStateMask)) { + case CFINV: + scope.Record(CPUFeatures::kFlagM); + break; + case AXFLAG: + case XAFLAG: + scope.Record(CPUFeatures::kAXFlag); + break; + } + } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) { + if (instr->Mask(SystemSysRegMask) == MRS) { + switch (instr->GetImmSystemRegister()) { + case RNDR: + case RNDRRS: + scope.Record(CPUFeatures::kRNG); + break; + } + } + } +} + +void CPUFeaturesAuditor::VisitTestBranch(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitUnallocated(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitUnconditionalBranch(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitUnconditionalBranchToRegister( + const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + switch (instr->Mask(UnconditionalBranchToRegisterMask)) { + case BRAAZ: + case BRABZ: + case BLRAAZ: + case BLRABZ: + case RETAA: + case RETAB: + case BRAA: + case BRAB: + case BLRAA: + case BLRAB: + scope.Record(CPUFeatures::kPAuth); + return; + default: + // No additional features. + return; + } +} + +void CPUFeaturesAuditor::VisitReserved(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::VisitUnimplemented(const Instruction* instr) { + RecordInstructionFeaturesScope scope(this); + USE(instr); +} + +void CPUFeaturesAuditor::Visit(Metadata* metadata, const Instruction* instr) { + VIXL_ASSERT(metadata->count("form") > 0); + const std::string& form = (*metadata)["form"]; + uint32_t form_hash = Hash(form.c_str()); + const FormToVisitorFnMap* fv = CPUFeaturesAuditor::GetFormToVisitorFnMap(); + FormToVisitorFnMap::const_iterator it = fv->find(form_hash); + if (it == fv->end()) { + RecordInstructionFeaturesScope scope(this); + std::map features = { + {"adclb_z_zzz"_h, CPUFeatures::kSVE2}, + {"adclt_z_zzz"_h, CPUFeatures::kSVE2}, + {"addhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"addhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"addp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"bcax_z_zzz"_h, CPUFeatures::kSVE2}, + {"bdep_z_zz"_h, + CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)}, + {"bext_z_zz"_h, + CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)}, + {"bgrp_z_zz"_h, + CPUFeatures(CPUFeatures::kSVE2, CPUFeatures::kSVEBitPerm)}, + {"bsl1n_z_zzz"_h, CPUFeatures::kSVE2}, + {"bsl2n_z_zzz"_h, CPUFeatures::kSVE2}, + {"bsl_z_zzz"_h, CPUFeatures::kSVE2}, + {"cadd_z_zz"_h, CPUFeatures::kSVE2}, + {"cdot_z_zzz"_h, CPUFeatures::kSVE2}, + {"cdot_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"cdot_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"cmla_z_zzz"_h, CPUFeatures::kSVE2}, + {"cmla_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"cmla_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"eor3_z_zzz"_h, CPUFeatures::kSVE2}, + {"eorbt_z_zz"_h, CPUFeatures::kSVE2}, + {"eortb_z_zz"_h, CPUFeatures::kSVE2}, + {"ext_z_zi_con"_h, CPUFeatures::kSVE2}, + {"faddp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fcvtlt_z_p_z_h2s"_h, CPUFeatures::kSVE2}, + {"fcvtlt_z_p_z_s2d"_h, CPUFeatures::kSVE2}, + {"fcvtnt_z_p_z_d2s"_h, CPUFeatures::kSVE2}, + {"fcvtnt_z_p_z_s2h"_h, CPUFeatures::kSVE2}, + {"fcvtx_z_p_z_d2s"_h, CPUFeatures::kSVE2}, + {"fcvtxnt_z_p_z_d2s"_h, CPUFeatures::kSVE2}, + {"flogb_z_p_z"_h, CPUFeatures::kSVE2}, + {"fmaxnmp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fmaxp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fminnmp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fminp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"fmlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"fmlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"fmlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"fmlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"fmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"histcnt_z_p_zz"_h, CPUFeatures::kSVE2}, + {"histseg_z_zz"_h, CPUFeatures::kSVE2}, + {"ldnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sb_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sh_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1sw_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"ldnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"match_p_p_zz"_h, CPUFeatures::kSVE2}, + {"mla_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"mla_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"mla_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"mls_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"mls_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"mls_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"mul_z_zz"_h, CPUFeatures::kSVE2}, + {"mul_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"mul_z_zzi_h"_h, CPUFeatures::kSVE2}, + {"mul_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"nbsl_z_zzz"_h, CPUFeatures::kSVE2}, + {"nmatch_p_p_zz"_h, CPUFeatures::kSVE2}, + {"pmul_z_zz"_h, CPUFeatures::kSVE2}, + {"pmullb_z_zz"_h, CPUFeatures::kSVE2}, + {"pmullt_z_zz"_h, CPUFeatures::kSVE2}, + {"raddhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"raddhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"rshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"rshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"rsubhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"rsubhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"saba_z_zzz"_h, CPUFeatures::kSVE2}, + {"sabalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sabalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sabdlb_z_zz"_h, CPUFeatures::kSVE2}, + {"sabdlt_z_zz"_h, CPUFeatures::kSVE2}, + {"sadalp_z_p_z"_h, CPUFeatures::kSVE2}, + {"saddlb_z_zz"_h, CPUFeatures::kSVE2}, + {"saddlbt_z_zz"_h, CPUFeatures::kSVE2}, + {"saddlt_z_zz"_h, CPUFeatures::kSVE2}, + {"saddwb_z_zz"_h, CPUFeatures::kSVE2}, + {"saddwt_z_zz"_h, CPUFeatures::kSVE2}, + {"sbclb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sbclt_z_zzz"_h, CPUFeatures::kSVE2}, + {"shadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"shrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"shrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"shsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"shsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sli_z_zzi"_h, CPUFeatures::kSVE2}, + {"smaxp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sminp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"smlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlalb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlalt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlslb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"smlslt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"smlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"smulh_z_zz"_h, CPUFeatures::kSVE2}, + {"smullb_z_zz"_h, CPUFeatures::kSVE2}, + {"smullb_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"smullb_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"smullt_z_zz"_h, CPUFeatures::kSVE2}, + {"smullt_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"smullt_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"splice_z_p_zz_con"_h, CPUFeatures::kSVE2}, + {"sqabs_z_p_z"_h, CPUFeatures::kSVE2}, + {"sqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqcadd_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlalb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmlalbt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlalt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlslb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmlslbt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqdmlslt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zzi_h"_h, CPUFeatures::kSVE2}, + {"sqdmulh_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmullb_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmullb_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmullb_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqdmullt_z_zz"_h, CPUFeatures::kSVE2}, + {"sqdmullt_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqdmullt_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqneg_z_p_z"_h, CPUFeatures::kSVE2}, + {"sqrdcmlah_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqrdcmlah_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdcmlah_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdmlah_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzz"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdmlsh_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zz"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zzi_h"_h, CPUFeatures::kSVE2}, + {"sqrdmulh_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"sqrshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqrshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqrshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqrshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqrshrunb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqrshrunt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshl_z_p_zi"_h, CPUFeatures::kSVE2}, + {"sqshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqshlu_z_p_zi"_h, CPUFeatures::kSVE2}, + {"sqshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshrunb_z_zi"_h, CPUFeatures::kSVE2}, + {"sqshrunt_z_zi"_h, CPUFeatures::kSVE2}, + {"sqsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sqxtnb_z_zz"_h, CPUFeatures::kSVE2}, + {"sqxtnt_z_zz"_h, CPUFeatures::kSVE2}, + {"sqxtunb_z_zz"_h, CPUFeatures::kSVE2}, + {"sqxtunt_z_zz"_h, CPUFeatures::kSVE2}, + {"srhadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"sri_z_zzi"_h, CPUFeatures::kSVE2}, + {"srshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"srshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"srshr_z_p_zi"_h, CPUFeatures::kSVE2}, + {"srsra_z_zi"_h, CPUFeatures::kSVE2}, + {"sshllb_z_zi"_h, CPUFeatures::kSVE2}, + {"sshllt_z_zi"_h, CPUFeatures::kSVE2}, + {"ssra_z_zi"_h, CPUFeatures::kSVE2}, + {"ssublb_z_zz"_h, CPUFeatures::kSVE2}, + {"ssublbt_z_zz"_h, CPUFeatures::kSVE2}, + {"ssublt_z_zz"_h, CPUFeatures::kSVE2}, + {"ssubltb_z_zz"_h, CPUFeatures::kSVE2}, + {"ssubwb_z_zz"_h, CPUFeatures::kSVE2}, + {"ssubwt_z_zz"_h, CPUFeatures::kSVE2}, + {"stnt1b_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1b_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1d_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1h_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1h_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1w_z_p_ar_d_64_unscaled"_h, CPUFeatures::kSVE2}, + {"stnt1w_z_p_ar_s_x32_unscaled"_h, CPUFeatures::kSVE2}, + {"subhnb_z_zz"_h, CPUFeatures::kSVE2}, + {"subhnt_z_zz"_h, CPUFeatures::kSVE2}, + {"suqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"tbl_z_zz_2"_h, CPUFeatures::kSVE2}, + {"tbx_z_zz"_h, CPUFeatures::kSVE2}, + {"uaba_z_zzz"_h, CPUFeatures::kSVE2}, + {"uabalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"uabalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"uabdlb_z_zz"_h, CPUFeatures::kSVE2}, + {"uabdlt_z_zz"_h, CPUFeatures::kSVE2}, + {"uadalp_z_p_z"_h, CPUFeatures::kSVE2}, + {"uaddlb_z_zz"_h, CPUFeatures::kSVE2}, + {"uaddlt_z_zz"_h, CPUFeatures::kSVE2}, + {"uaddwb_z_zz"_h, CPUFeatures::kSVE2}, + {"uaddwt_z_zz"_h, CPUFeatures::kSVE2}, + {"uhadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uhsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uhsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"umaxp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uminp_z_p_zz"_h, CPUFeatures::kSVE2}, + {"umlalb_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlalb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlalb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umlalt_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlalt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlalt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umlslb_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlslb_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlslb_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umlslt_z_zzz"_h, CPUFeatures::kSVE2}, + {"umlslt_z_zzzi_d"_h, CPUFeatures::kSVE2}, + {"umlslt_z_zzzi_s"_h, CPUFeatures::kSVE2}, + {"umulh_z_zz"_h, CPUFeatures::kSVE2}, + {"umullb_z_zz"_h, CPUFeatures::kSVE2}, + {"umullb_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"umullb_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"umullt_z_zz"_h, CPUFeatures::kSVE2}, + {"umullt_z_zzi_d"_h, CPUFeatures::kSVE2}, + {"umullt_z_zzi_s"_h, CPUFeatures::kSVE2}, + {"uqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqrshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqrshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqrshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"uqrshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"uqshl_z_p_zi"_h, CPUFeatures::kSVE2}, + {"uqshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqshrnb_z_zi"_h, CPUFeatures::kSVE2}, + {"uqshrnt_z_zi"_h, CPUFeatures::kSVE2}, + {"uqsub_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqsubr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"uqxtnb_z_zz"_h, CPUFeatures::kSVE2}, + {"uqxtnt_z_zz"_h, CPUFeatures::kSVE2}, + {"urecpe_z_p_z"_h, CPUFeatures::kSVE2}, + {"urhadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"urshl_z_p_zz"_h, CPUFeatures::kSVE2}, + {"urshlr_z_p_zz"_h, CPUFeatures::kSVE2}, + {"urshr_z_p_zi"_h, CPUFeatures::kSVE2}, + {"ursqrte_z_p_z"_h, CPUFeatures::kSVE2}, + {"ursra_z_zi"_h, CPUFeatures::kSVE2}, + {"ushllb_z_zi"_h, CPUFeatures::kSVE2}, + {"ushllt_z_zi"_h, CPUFeatures::kSVE2}, + {"usqadd_z_p_zz"_h, CPUFeatures::kSVE2}, + {"usra_z_zi"_h, CPUFeatures::kSVE2}, + {"usublb_z_zz"_h, CPUFeatures::kSVE2}, + {"usublt_z_zz"_h, CPUFeatures::kSVE2}, + {"usubwb_z_zz"_h, CPUFeatures::kSVE2}, + {"usubwt_z_zz"_h, CPUFeatures::kSVE2}, + {"whilege_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilegt_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilehi_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilehs_p_p_rr"_h, CPUFeatures::kSVE2}, + {"whilerw_p_rr"_h, CPUFeatures::kSVE2}, + {"whilewr_p_rr"_h, CPUFeatures::kSVE2}, + {"xar_z_zzi"_h, CPUFeatures::kSVE2}, + {"smmla_z_zzz"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"ummla_z_zzz"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"usmmla_z_zzz"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"fmmla_z_zzz_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF32MM)}, + {"fmmla_z_zzz_d"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"smmla_asimdsame2_g"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"ummla_asimdsame2_g"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"usmmla_asimdsame2_g"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"ld1row_z_p_bi_u32"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1row_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rod_z_p_bi_u64"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rod_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rob_z_p_bi_u8"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1rob_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1roh_z_p_bi_u16"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"ld1roh_z_p_br_contiguous"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEF64MM)}, + {"usdot_asimdsame2_d"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"sudot_asimdelem_d"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"usdot_asimdelem_d"_h, + CPUFeatures(CPUFeatures::kNEON, CPUFeatures::kI8MM)}, + {"usdot_z_zzz_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"usdot_z_zzzi_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"sudot_z_zzzi_s"_h, + CPUFeatures(CPUFeatures::kSVE, CPUFeatures::kSVEI8MM)}, + {"addg_64_addsub_immtags"_h, CPUFeatures::kMTE}, + {"gmi_64g_dp_2src"_h, CPUFeatures::kMTE}, + {"irg_64i_dp_2src"_h, CPUFeatures::kMTE}, + {"ldg_64loffset_ldsttags"_h, CPUFeatures::kMTE}, + {"st2g_64soffset_ldsttags"_h, CPUFeatures::kMTE}, + {"st2g_64spost_ldsttags"_h, CPUFeatures::kMTE}, + {"st2g_64spre_ldsttags"_h, CPUFeatures::kMTE}, + {"stgp_64_ldstpair_off"_h, CPUFeatures::kMTE}, + {"stgp_64_ldstpair_post"_h, CPUFeatures::kMTE}, + {"stgp_64_ldstpair_pre"_h, CPUFeatures::kMTE}, + {"stg_64soffset_ldsttags"_h, CPUFeatures::kMTE}, + {"stg_64spost_ldsttags"_h, CPUFeatures::kMTE}, + {"stg_64spre_ldsttags"_h, CPUFeatures::kMTE}, + {"stz2g_64soffset_ldsttags"_h, CPUFeatures::kMTE}, + {"stz2g_64spost_ldsttags"_h, CPUFeatures::kMTE}, + {"stz2g_64spre_ldsttags"_h, CPUFeatures::kMTE}, + {"stzg_64soffset_ldsttags"_h, CPUFeatures::kMTE}, + {"stzg_64spost_ldsttags"_h, CPUFeatures::kMTE}, + {"stzg_64spre_ldsttags"_h, CPUFeatures::kMTE}, + {"subg_64_addsub_immtags"_h, CPUFeatures::kMTE}, + {"subps_64s_dp_2src"_h, CPUFeatures::kMTE}, + {"subp_64s_dp_2src"_h, CPUFeatures::kMTE}, + {"cpyen_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyern_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyewn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpye_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfen_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfern_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfewn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfe_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfmn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfmrn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfmwn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfm_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfpn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfprn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfpwn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyfp_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpymn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpymrn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpymwn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpym_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpypn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyprn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpypwn_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"cpyp_cpy_memcms"_h, CPUFeatures::kMOPS}, + {"seten_set_memcms"_h, CPUFeatures::kMOPS}, + {"sete_set_memcms"_h, CPUFeatures::kMOPS}, + {"setgen_set_memcms"_h, + CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)}, + {"setge_set_memcms"_h, + CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)}, + {"setgmn_set_memcms"_h, + CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)}, + {"setgm_set_memcms"_h, + CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)}, + {"setgpn_set_memcms"_h, + CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)}, + {"setgp_set_memcms"_h, + CPUFeatures(CPUFeatures::kMOPS, CPUFeatures::kMTE)}, + {"setmn_set_memcms"_h, CPUFeatures::kMOPS}, + {"setm_set_memcms"_h, CPUFeatures::kMOPS}, + {"setpn_set_memcms"_h, CPUFeatures::kMOPS}, + {"setp_set_memcms"_h, CPUFeatures::kMOPS}, + {"abs_32_dp_1src"_h, CPUFeatures::kCSSC}, + {"abs_64_dp_1src"_h, CPUFeatures::kCSSC}, + {"cnt_32_dp_1src"_h, CPUFeatures::kCSSC}, + {"cnt_64_dp_1src"_h, CPUFeatures::kCSSC}, + {"ctz_32_dp_1src"_h, CPUFeatures::kCSSC}, + {"ctz_64_dp_1src"_h, CPUFeatures::kCSSC}, + {"smax_32_dp_2src"_h, CPUFeatures::kCSSC}, + {"smax_64_dp_2src"_h, CPUFeatures::kCSSC}, + {"smin_32_dp_2src"_h, CPUFeatures::kCSSC}, + {"smin_64_dp_2src"_h, CPUFeatures::kCSSC}, + {"umax_32_dp_2src"_h, CPUFeatures::kCSSC}, + {"umax_64_dp_2src"_h, CPUFeatures::kCSSC}, + {"umin_32_dp_2src"_h, CPUFeatures::kCSSC}, + {"umin_64_dp_2src"_h, CPUFeatures::kCSSC}, + {"smax_32_minmax_imm"_h, CPUFeatures::kCSSC}, + {"smax_64_minmax_imm"_h, CPUFeatures::kCSSC}, + {"smin_32_minmax_imm"_h, CPUFeatures::kCSSC}, + {"smin_64_minmax_imm"_h, CPUFeatures::kCSSC}, + {"umax_32u_minmax_imm"_h, CPUFeatures::kCSSC}, + {"umax_64u_minmax_imm"_h, CPUFeatures::kCSSC}, + {"umin_32u_minmax_imm"_h, CPUFeatures::kCSSC}, + {"umin_64u_minmax_imm"_h, CPUFeatures::kCSSC}, + }; + + if (features.count(form_hash) > 0) { + scope.Record(features[form_hash]); + } + } else { + (it->second)(this, instr); + } +} + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/decoder-aarch64.cc b/3rdparty/vixl/src/aarch64/decoder-aarch64.cc new file mode 100644 index 0000000000..dc56633aa5 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/decoder-aarch64.cc @@ -0,0 +1,575 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "../globals-vixl.h" +#include "../utils-vixl.h" + +#include "decoder-aarch64.h" +#include "decoder-constants-aarch64.h" + +namespace vixl { +namespace aarch64 { + +void Decoder::Decode(const Instruction* instr) { + std::list::iterator it; + for (it = visitors_.begin(); it != visitors_.end(); it++) { + VIXL_ASSERT((*it)->IsConstVisitor()); + } + VIXL_ASSERT(compiled_decoder_root_ != NULL); + compiled_decoder_root_->Decode(instr); +} + +void Decoder::Decode(Instruction* instr) { + compiled_decoder_root_->Decode(const_cast(instr)); +} + +void Decoder::AddDecodeNode(const DecodeNode& node) { + if (decode_nodes_.count(node.GetName()) == 0) { + decode_nodes_.insert(std::make_pair(node.GetName(), node)); + } +} + +DecodeNode* Decoder::GetDecodeNode(std::string name) { + if (decode_nodes_.count(name) != 1) { + std::string msg = "Can't find decode node " + name + ".\n"; + VIXL_ABORT_WITH_MSG(msg.c_str()); + } + return &decode_nodes_[name]; +} + +void Decoder::ConstructDecodeGraph() { + // Add all of the decoding nodes to the Decoder. + for (unsigned i = 0; i < ArrayLength(kDecodeMapping); i++) { + AddDecodeNode(DecodeNode(kDecodeMapping[i], this)); + + // Add a node for each instruction form named, identified by having no '_' + // prefix on the node name. + const DecodeMapping& map = kDecodeMapping[i]; + for (unsigned j = 0; j < map.mapping.size(); j++) { + if ((map.mapping[j].handler != NULL) && + (map.mapping[j].handler[0] != '_')) { + AddDecodeNode(DecodeNode(map.mapping[j].handler, this)); + } + } + } + + // Add an "unallocated" node, used when an instruction encoding is not + // recognised by the decoding graph. + AddDecodeNode(DecodeNode("unallocated", this)); + + // Compile the graph from the root. + compiled_decoder_root_ = GetDecodeNode("Root")->Compile(this); +} + +void Decoder::AppendVisitor(DecoderVisitor* new_visitor) { + visitors_.push_back(new_visitor); +} + + +void Decoder::PrependVisitor(DecoderVisitor* new_visitor) { + visitors_.push_front(new_visitor); +} + + +void Decoder::InsertVisitorBefore(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor) { + std::list::iterator it; + for (it = visitors_.begin(); it != visitors_.end(); it++) { + if (*it == registered_visitor) { + visitors_.insert(it, new_visitor); + return; + } + } + // We reached the end of the list. The last element must be + // registered_visitor. + VIXL_ASSERT(*it == registered_visitor); + visitors_.insert(it, new_visitor); +} + + +void Decoder::InsertVisitorAfter(DecoderVisitor* new_visitor, + DecoderVisitor* registered_visitor) { + std::list::iterator it; + for (it = visitors_.begin(); it != visitors_.end(); it++) { + if (*it == registered_visitor) { + it++; + visitors_.insert(it, new_visitor); + return; + } + } + // We reached the end of the list. The last element must be + // registered_visitor. + VIXL_ASSERT(*it == registered_visitor); + visitors_.push_back(new_visitor); +} + + +void Decoder::RemoveVisitor(DecoderVisitor* visitor) { + visitors_.remove(visitor); +} + +void Decoder::VisitNamedInstruction(const Instruction* instr, + const std::string& name) { + std::list::iterator it; + Metadata m = {{"form", name}}; + for (it = visitors_.begin(); it != visitors_.end(); it++) { + (*it)->Visit(&m, instr); + } +} + +// Initialise empty vectors for sampled bits and pattern table. +const std::vector DecodeNode::kEmptySampledBits; +const std::vector DecodeNode::kEmptyPatternTable; + +void DecodeNode::CompileNodeForBits(Decoder* decoder, + std::string name, + uint32_t bits) { + DecodeNode* n = decoder->GetDecodeNode(name); + VIXL_ASSERT(n != NULL); + if (!n->IsCompiled()) { + n->Compile(decoder); + } + VIXL_ASSERT(n->IsCompiled()); + compiled_node_->SetNodeForBits(bits, n->GetCompiledNode()); +} + + +#define INSTANTIATE_TEMPLATE_M(M) \ + case 0x##M: \ + bit_extract_fn = &Instruction::ExtractBits<0x##M>; \ + break; +#define INSTANTIATE_TEMPLATE_MV(M, V) \ + case 0x##M##V: \ + bit_extract_fn = &Instruction::IsMaskedValue<0x##M, 0x##V>; \ + break; + +BitExtractFn DecodeNode::GetBitExtractFunctionHelper(uint32_t x, uint32_t y) { + // Instantiate a templated bit extraction function for every pattern we + // might encounter. If the assertion in the default clause is reached, add a + // new instantiation below using the information in the failure message. + BitExtractFn bit_extract_fn = NULL; + + // The arguments x and y represent the mask and value. If y is 0, x is the + // mask. Otherwise, y is the mask, and x is the value to compare against a + // masked result. + uint64_t signature = (static_cast(y) << 32) | x; + switch (signature) { + INSTANTIATE_TEMPLATE_M(00000002); + INSTANTIATE_TEMPLATE_M(00000010); + INSTANTIATE_TEMPLATE_M(00000060); + INSTANTIATE_TEMPLATE_M(000000df); + INSTANTIATE_TEMPLATE_M(00000100); + INSTANTIATE_TEMPLATE_M(00000200); + INSTANTIATE_TEMPLATE_M(00000400); + INSTANTIATE_TEMPLATE_M(00000800); + INSTANTIATE_TEMPLATE_M(00000c00); + INSTANTIATE_TEMPLATE_M(00000c10); + INSTANTIATE_TEMPLATE_M(00000fc0); + INSTANTIATE_TEMPLATE_M(00001000); + INSTANTIATE_TEMPLATE_M(00001400); + INSTANTIATE_TEMPLATE_M(00001800); + INSTANTIATE_TEMPLATE_M(00001c00); + INSTANTIATE_TEMPLATE_M(00002000); + INSTANTIATE_TEMPLATE_M(00002010); + INSTANTIATE_TEMPLATE_M(00002400); + INSTANTIATE_TEMPLATE_M(00003000); + INSTANTIATE_TEMPLATE_M(00003020); + INSTANTIATE_TEMPLATE_M(00003400); + INSTANTIATE_TEMPLATE_M(00003800); + INSTANTIATE_TEMPLATE_M(00003c00); + INSTANTIATE_TEMPLATE_M(00013000); + INSTANTIATE_TEMPLATE_M(000203e0); + INSTANTIATE_TEMPLATE_M(000303e0); + INSTANTIATE_TEMPLATE_M(00040000); + INSTANTIATE_TEMPLATE_M(00040010); + INSTANTIATE_TEMPLATE_M(00060000); + INSTANTIATE_TEMPLATE_M(00061000); + INSTANTIATE_TEMPLATE_M(00070000); + INSTANTIATE_TEMPLATE_M(000703c0); + INSTANTIATE_TEMPLATE_M(00080000); + INSTANTIATE_TEMPLATE_M(00090000); + INSTANTIATE_TEMPLATE_M(000f0000); + INSTANTIATE_TEMPLATE_M(000f0010); + INSTANTIATE_TEMPLATE_M(00100000); + INSTANTIATE_TEMPLATE_M(00180000); + INSTANTIATE_TEMPLATE_M(001b1c00); + INSTANTIATE_TEMPLATE_M(001f0000); + INSTANTIATE_TEMPLATE_M(001f0018); + INSTANTIATE_TEMPLATE_M(001f2000); + INSTANTIATE_TEMPLATE_M(001f3000); + INSTANTIATE_TEMPLATE_M(00400000); + INSTANTIATE_TEMPLATE_M(00400018); + INSTANTIATE_TEMPLATE_M(00400800); + INSTANTIATE_TEMPLATE_M(00403000); + INSTANTIATE_TEMPLATE_M(00500000); + INSTANTIATE_TEMPLATE_M(00500800); + INSTANTIATE_TEMPLATE_M(00583000); + INSTANTIATE_TEMPLATE_M(005f0000); + INSTANTIATE_TEMPLATE_M(00800000); + INSTANTIATE_TEMPLATE_M(00800400); + INSTANTIATE_TEMPLATE_M(00800c1d); + INSTANTIATE_TEMPLATE_M(0080101f); + INSTANTIATE_TEMPLATE_M(00801c00); + INSTANTIATE_TEMPLATE_M(00803000); + INSTANTIATE_TEMPLATE_M(00803c00); + INSTANTIATE_TEMPLATE_M(009f0000); + INSTANTIATE_TEMPLATE_M(009f2000); + INSTANTIATE_TEMPLATE_M(00c00000); + INSTANTIATE_TEMPLATE_M(00c00010); + INSTANTIATE_TEMPLATE_M(00c0001f); + INSTANTIATE_TEMPLATE_M(00c00200); + INSTANTIATE_TEMPLATE_M(00c00400); + INSTANTIATE_TEMPLATE_M(00c00c00); + INSTANTIATE_TEMPLATE_M(00c00c19); + INSTANTIATE_TEMPLATE_M(00c01000); + INSTANTIATE_TEMPLATE_M(00c01400); + INSTANTIATE_TEMPLATE_M(00c01c00); + INSTANTIATE_TEMPLATE_M(00c02000); + INSTANTIATE_TEMPLATE_M(00c03000); + INSTANTIATE_TEMPLATE_M(00c03c00); + INSTANTIATE_TEMPLATE_M(00c70000); + INSTANTIATE_TEMPLATE_M(00c83000); + INSTANTIATE_TEMPLATE_M(00d00200); + INSTANTIATE_TEMPLATE_M(00d80800); + INSTANTIATE_TEMPLATE_M(00d81800); + INSTANTIATE_TEMPLATE_M(00d81c00); + INSTANTIATE_TEMPLATE_M(00d82800); + INSTANTIATE_TEMPLATE_M(00d82c00); + INSTANTIATE_TEMPLATE_M(00d92400); + INSTANTIATE_TEMPLATE_M(00d93000); + INSTANTIATE_TEMPLATE_M(00db0000); + INSTANTIATE_TEMPLATE_M(00db2000); + INSTANTIATE_TEMPLATE_M(00dc0000); + INSTANTIATE_TEMPLATE_M(00dc2000); + INSTANTIATE_TEMPLATE_M(00df0000); + INSTANTIATE_TEMPLATE_M(40000000); + INSTANTIATE_TEMPLATE_M(40000010); + INSTANTIATE_TEMPLATE_M(40000c00); + INSTANTIATE_TEMPLATE_M(40002000); + INSTANTIATE_TEMPLATE_M(40002010); + INSTANTIATE_TEMPLATE_M(40003000); + INSTANTIATE_TEMPLATE_M(40003c00); + INSTANTIATE_TEMPLATE_M(401f2000); + INSTANTIATE_TEMPLATE_M(40400800); + INSTANTIATE_TEMPLATE_M(40400c00); + INSTANTIATE_TEMPLATE_M(40403c00); + INSTANTIATE_TEMPLATE_M(405f0000); + INSTANTIATE_TEMPLATE_M(40800000); + INSTANTIATE_TEMPLATE_M(40800c00); + INSTANTIATE_TEMPLATE_M(40802000); + INSTANTIATE_TEMPLATE_M(40802010); + INSTANTIATE_TEMPLATE_M(40803400); + INSTANTIATE_TEMPLATE_M(40803c00); + INSTANTIATE_TEMPLATE_M(40c00000); + INSTANTIATE_TEMPLATE_M(40c00400); + INSTANTIATE_TEMPLATE_M(40c00800); + INSTANTIATE_TEMPLATE_M(40c00c00); + INSTANTIATE_TEMPLATE_M(40c00c10); + INSTANTIATE_TEMPLATE_M(40c02000); + INSTANTIATE_TEMPLATE_M(40c02010); + INSTANTIATE_TEMPLATE_M(40c02c00); + INSTANTIATE_TEMPLATE_M(40c03c00); + INSTANTIATE_TEMPLATE_M(40c80000); + INSTANTIATE_TEMPLATE_M(40c90000); + INSTANTIATE_TEMPLATE_M(40cf0000); + INSTANTIATE_TEMPLATE_M(40d02000); + INSTANTIATE_TEMPLATE_M(40d02010); + INSTANTIATE_TEMPLATE_M(40d80000); + INSTANTIATE_TEMPLATE_M(40d81800); + INSTANTIATE_TEMPLATE_M(40dc0000); + INSTANTIATE_TEMPLATE_M(bf20c000); + INSTANTIATE_TEMPLATE_MV(00000006, 00000000); + INSTANTIATE_TEMPLATE_MV(00000006, 00000006); + INSTANTIATE_TEMPLATE_MV(00000007, 00000000); + INSTANTIATE_TEMPLATE_MV(0000001f, 0000001f); + INSTANTIATE_TEMPLATE_MV(00000210, 00000000); + INSTANTIATE_TEMPLATE_MV(000003e0, 00000000); + INSTANTIATE_TEMPLATE_MV(000003e0, 000003e0); + INSTANTIATE_TEMPLATE_MV(000003e2, 000003e0); + INSTANTIATE_TEMPLATE_MV(000003e6, 000003e0); + INSTANTIATE_TEMPLATE_MV(000003e6, 000003e6); + INSTANTIATE_TEMPLATE_MV(00000c00, 00000000); + INSTANTIATE_TEMPLATE_MV(00000fc0, 00000000); + INSTANTIATE_TEMPLATE_MV(000013e0, 00001000); + INSTANTIATE_TEMPLATE_MV(00001c00, 00000000); + INSTANTIATE_TEMPLATE_MV(00002400, 00000000); + INSTANTIATE_TEMPLATE_MV(00003000, 00000000); + INSTANTIATE_TEMPLATE_MV(00003000, 00001000); + INSTANTIATE_TEMPLATE_MV(00003000, 00002000); + INSTANTIATE_TEMPLATE_MV(00003000, 00003000); + INSTANTIATE_TEMPLATE_MV(00003010, 00000000); + INSTANTIATE_TEMPLATE_MV(00003c00, 00003c00); + INSTANTIATE_TEMPLATE_MV(00040010, 00000000); + INSTANTIATE_TEMPLATE_MV(00060000, 00000000); + INSTANTIATE_TEMPLATE_MV(00061000, 00000000); + INSTANTIATE_TEMPLATE_MV(00070000, 00030000); + INSTANTIATE_TEMPLATE_MV(00073ee0, 00033060); + INSTANTIATE_TEMPLATE_MV(00073f9f, 0000001f); + INSTANTIATE_TEMPLATE_MV(000f0000, 00000000); + INSTANTIATE_TEMPLATE_MV(000f0010, 00000000); + INSTANTIATE_TEMPLATE_MV(00100200, 00000000); + INSTANTIATE_TEMPLATE_MV(00100210, 00000000); + INSTANTIATE_TEMPLATE_MV(00160000, 00000000); + INSTANTIATE_TEMPLATE_MV(00170000, 00000000); + INSTANTIATE_TEMPLATE_MV(001c0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001d0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001e0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001f0000, 00000000); + INSTANTIATE_TEMPLATE_MV(001f0000, 00010000); + INSTANTIATE_TEMPLATE_MV(001f0000, 00100000); + INSTANTIATE_TEMPLATE_MV(001f0000, 001f0000); + INSTANTIATE_TEMPLATE_MV(001f3000, 00000000); + INSTANTIATE_TEMPLATE_MV(001f3000, 00001000); + INSTANTIATE_TEMPLATE_MV(001f3000, 001f0000); + INSTANTIATE_TEMPLATE_MV(001f300f, 0000000d); + INSTANTIATE_TEMPLATE_MV(001f301f, 0000000d); + INSTANTIATE_TEMPLATE_MV(001f33e0, 000103e0); + INSTANTIATE_TEMPLATE_MV(001f3800, 00000000); + INSTANTIATE_TEMPLATE_MV(00401000, 00400000); + INSTANTIATE_TEMPLATE_MV(005f3000, 001f0000); + INSTANTIATE_TEMPLATE_MV(005f3000, 001f1000); + INSTANTIATE_TEMPLATE_MV(00800010, 00000000); + INSTANTIATE_TEMPLATE_MV(00800400, 00000000); + INSTANTIATE_TEMPLATE_MV(00800410, 00000000); + INSTANTIATE_TEMPLATE_MV(00803000, 00002000); + INSTANTIATE_TEMPLATE_MV(00870000, 00000000); + INSTANTIATE_TEMPLATE_MV(009f0000, 00010000); + INSTANTIATE_TEMPLATE_MV(00c00000, 00000000); + INSTANTIATE_TEMPLATE_MV(00c00000, 00400000); + INSTANTIATE_TEMPLATE_MV(00c0001f, 00000000); + INSTANTIATE_TEMPLATE_MV(00c001ff, 00000000); + INSTANTIATE_TEMPLATE_MV(00c00200, 00400000); + INSTANTIATE_TEMPLATE_MV(00c0020f, 00400000); + INSTANTIATE_TEMPLATE_MV(00c003e0, 00000000); + INSTANTIATE_TEMPLATE_MV(00c00800, 00000000); + INSTANTIATE_TEMPLATE_MV(00d80800, 00000000); + INSTANTIATE_TEMPLATE_MV(00df0000, 00000000); + INSTANTIATE_TEMPLATE_MV(00df3800, 001f0800); + INSTANTIATE_TEMPLATE_MV(40002000, 40000000); + INSTANTIATE_TEMPLATE_MV(40003c00, 00000000); + INSTANTIATE_TEMPLATE_MV(40040000, 00000000); + INSTANTIATE_TEMPLATE_MV(401f2000, 401f0000); + INSTANTIATE_TEMPLATE_MV(40800c00, 40000400); + INSTANTIATE_TEMPLATE_MV(40c00000, 00000000); + INSTANTIATE_TEMPLATE_MV(40c00000, 00400000); + INSTANTIATE_TEMPLATE_MV(40c00000, 40000000); + INSTANTIATE_TEMPLATE_MV(40c00000, 40800000); + INSTANTIATE_TEMPLATE_MV(40df0000, 00000000); + default: { + static bool printed_preamble = false; + if (!printed_preamble) { + printf("One or more missing template instantiations.\n"); + printf( + "Add the following to either GetBitExtractFunction() " + "implementations\n"); + printf("in %s near line %d:\n", __FILE__, __LINE__); + printed_preamble = true; + } + + if (y == 0) { + printf(" INSTANTIATE_TEMPLATE_M(%08x);\n", x); + bit_extract_fn = &Instruction::ExtractBitsAbsent; + } else { + printf(" INSTANTIATE_TEMPLATE_MV(%08x, %08x);\n", y, x); + bit_extract_fn = &Instruction::IsMaskedValueAbsent; + } + } + } + return bit_extract_fn; +} + +#undef INSTANTIATE_TEMPLATE_M +#undef INSTANTIATE_TEMPLATE_MV + +bool DecodeNode::TryCompileOptimisedDecodeTable(Decoder* decoder) { + // EitherOr optimisation: if there are only one or two patterns in the table, + // try to optimise the node to exploit that. + size_t table_size = pattern_table_.size(); + if ((table_size <= 2) && (GetSampledBitsCount() > 1)) { + // TODO: support 'x' in this optimisation by dropping the sampled bit + // positions before making the mask/value. + if (!PatternContainsSymbol(pattern_table_[0].pattern, + PatternSymbol::kSymbolX) && + (table_size == 1)) { + // A pattern table consisting of a fixed pattern with no x's, and an + // "otherwise" or absent case. Optimise this into an instruction mask and + // value test. + uint32_t single_decode_mask = 0; + uint32_t single_decode_value = 0; + const std::vector& bits = GetSampledBits(); + + // Construct the instruction mask and value from the pattern. + VIXL_ASSERT(bits.size() == GetPatternLength(pattern_table_[0].pattern)); + for (size_t i = 0; i < bits.size(); i++) { + single_decode_mask |= 1U << bits[i]; + if (GetSymbolAt(pattern_table_[0].pattern, i) == + PatternSymbol::kSymbol1) { + single_decode_value |= 1U << bits[i]; + } + } + BitExtractFn bit_extract_fn = + GetBitExtractFunction(single_decode_mask, single_decode_value); + + // Create a compiled node that contains a two entry table for the + // either/or cases. + CreateCompiledNode(bit_extract_fn, 2); + + // Set DecodeNode for when the instruction after masking doesn't match the + // value. + CompileNodeForBits(decoder, "unallocated", 0); + + // Set DecodeNode for when it does match. + CompileNodeForBits(decoder, pattern_table_[0].handler, 1); + + return true; + } + } + return false; +} + +CompiledDecodeNode* DecodeNode::Compile(Decoder* decoder) { + if (IsLeafNode()) { + // A leaf node is a simple wrapper around a visitor function, with no + // instruction decoding to do. + CreateVisitorNode(); + } else if (!TryCompileOptimisedDecodeTable(decoder)) { + // The "otherwise" node is the default next node if no pattern matches. + std::string otherwise = "unallocated"; + + // For each pattern in pattern_table_, create an entry in matches that + // has a corresponding mask and value for the pattern. + std::vector matches; + for (size_t i = 0; i < pattern_table_.size(); i++) { + matches.push_back(GenerateMaskValuePair( + GenerateOrderedPattern(pattern_table_[i].pattern))); + } + + BitExtractFn bit_extract_fn = + GetBitExtractFunction(GenerateSampledBitsMask()); + + // Create a compiled node that contains a table with an entry for every bit + // pattern. + CreateCompiledNode(bit_extract_fn, + static_cast(1) << GetSampledBitsCount()); + VIXL_ASSERT(compiled_node_ != NULL); + + // When we find a pattern matches the representation, set the node's decode + // function for that representation to the corresponding function. + for (uint32_t bits = 0; bits < (1U << GetSampledBitsCount()); bits++) { + for (size_t i = 0; i < matches.size(); i++) { + if ((bits & matches[i].first) == matches[i].second) { + // Only one instruction class should match for each value of bits, so + // if we get here, the node pointed to should still be unallocated. + VIXL_ASSERT(compiled_node_->GetNodeForBits(bits) == NULL); + CompileNodeForBits(decoder, pattern_table_[i].handler, bits); + break; + } + } + + // If the decode_table_ entry for these bits is still NULL, the + // instruction must be handled by the "otherwise" case, which by default + // is the Unallocated visitor. + if (compiled_node_->GetNodeForBits(bits) == NULL) { + CompileNodeForBits(decoder, otherwise, bits); + } + } + } + + VIXL_ASSERT(compiled_node_ != NULL); + return compiled_node_; +} + +void CompiledDecodeNode::Decode(const Instruction* instr) const { + if (IsLeafNode()) { + // If this node is a leaf, call the registered visitor function. + VIXL_ASSERT(decoder_ != NULL); + decoder_->VisitNamedInstruction(instr, instruction_name_); + } else { + // Otherwise, using the sampled bit extractor for this node, look up the + // next node in the decode tree, and call its Decode method. + VIXL_ASSERT(bit_extract_fn_ != NULL); + VIXL_ASSERT((instr->*bit_extract_fn_)() < decode_table_size_); + VIXL_ASSERT(decode_table_[(instr->*bit_extract_fn_)()] != NULL); + decode_table_[(instr->*bit_extract_fn_)()]->Decode(instr); + } +} + +DecodeNode::MaskValuePair DecodeNode::GenerateMaskValuePair( + uint32_t pattern) const { + uint32_t mask = 0, value = 0; + for (size_t i = 0; i < GetPatternLength(pattern); i++) { + PatternSymbol sym = GetSymbolAt(pattern, i); + mask = (mask << 1) | ((sym == PatternSymbol::kSymbolX) ? 0 : 1); + value = (value << 1) | (static_cast(sym) & 1); + } + return std::make_pair(mask, value); +} + +uint32_t DecodeNode::GenerateOrderedPattern(uint32_t pattern) const { + const std::vector& sampled_bits = GetSampledBits(); + uint64_t temp = 0xffffffffffffffff; + + // Place symbols into the field of set bits. Symbols are two bits wide and + // take values 0, 1 or 2, so 3 will represent "no symbol". + for (size_t i = 0; i < sampled_bits.size(); i++) { + int shift = sampled_bits[i] * 2; + temp ^= static_cast(kEndOfPattern) << shift; + temp |= static_cast(GetSymbolAt(pattern, i)) << shift; + } + + // Iterate over temp and extract new pattern ordered by sample position. + uint32_t result = kEndOfPattern; // End of pattern marker. + + // Iterate over the pattern one symbol (two bits) at a time. + for (int i = 62; i >= 0; i -= 2) { + uint32_t sym = (temp >> i) & kPatternSymbolMask; + + // If this is a valid symbol, shift into the result. + if (sym != kEndOfPattern) { + result = (result << 2) | sym; + } + } + + // The length of the ordered pattern must be the same as the input pattern, + // and the number of sampled bits. + VIXL_ASSERT(GetPatternLength(result) == GetPatternLength(pattern)); + VIXL_ASSERT(GetPatternLength(result) == sampled_bits.size()); + + return result; +} + +uint32_t DecodeNode::GenerateSampledBitsMask() const { + uint32_t mask = 0; + for (int bit : GetSampledBits()) { + mask |= 1 << bit; + } + return mask; +} + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/disasm-aarch64.cc b/3rdparty/vixl/src/aarch64/disasm-aarch64.cc new file mode 100644 index 0000000000..3592752c7c --- /dev/null +++ b/3rdparty/vixl/src/aarch64/disasm-aarch64.cc @@ -0,0 +1,7581 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include + +#include "disasm-aarch64.h" + +namespace vixl { +namespace aarch64 { + +const Disassembler::FormToVisitorFnMap *Disassembler::GetFormToVisitorFnMap() { + static const FormToVisitorFnMap form_to_visitor = { + DEFAULT_FORM_TO_VISITOR_MAP(Disassembler), + {"autia1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autiasp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autiaz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autib1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autibsp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"autibz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"axflag_m_pstate"_h, &Disassembler::DisassembleNoArgs}, + {"cfinv_m_pstate"_h, &Disassembler::DisassembleNoArgs}, + {"csdb_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"dgh_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"ssbb_only_barriers"_h, &Disassembler::DisassembleNoArgs}, + {"esb_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"isb_bi_barriers"_h, &Disassembler::DisassembleNoArgs}, + {"nop_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacia1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"paciasp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"paciaz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacib1716_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacibsp_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"pacibz_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"sev_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"sevl_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"wfe_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"wfi_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"xaflag_m_pstate"_h, &Disassembler::DisassembleNoArgs}, + {"xpaclri_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"yield_hi_hints"_h, &Disassembler::DisassembleNoArgs}, + {"abs_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"cls_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"clz_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"cnt_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"neg_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"rev16_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"rev32_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"rev64_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"sqabs_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"sqneg_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"suqadd_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"urecpe_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"ursqrte_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"usqadd_asimdmisc_r"_h, &Disassembler::VisitNEON2RegMisc}, + {"not_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegLogical}, + {"rbit_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegLogical}, + {"xtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"sqxtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"uqxtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"sqxtun_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"shll_asimdmisc_s"_h, &Disassembler::DisassembleNEON2RegExtract}, + {"sadalp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"saddlp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"uadalp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"uaddlp_asimdmisc_p"_h, &Disassembler::DisassembleNEON2RegAddlp}, + {"cmeq_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmge_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmgt_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmle_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"cmlt_asimdmisc_z"_h, &Disassembler::DisassembleNEON2RegCompare}, + {"fcmeq_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmge_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmgt_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmle_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcmlt_asimdmisc_fz"_h, &Disassembler::DisassembleNEON2RegFPCompare}, + {"fcvtl_asimdmisc_l"_h, &Disassembler::DisassembleNEON2RegFPConvert}, + {"fcvtn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegFPConvert}, + {"fcvtxn_asimdmisc_n"_h, &Disassembler::DisassembleNEON2RegFPConvert}, + {"fabs_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtas_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtau_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtms_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtmu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtns_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtnu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtps_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtpu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtzs_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fcvtzu_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fneg_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frecpe_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint32x_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint32z_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint64x_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frint64z_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frinta_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frinti_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintm_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintn_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintp_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintx_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frintz_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"frsqrte_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"fsqrt_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"scvtf_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"ucvtf_asimdmisc_r"_h, &Disassembler::DisassembleNEON2RegFP}, + {"smlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"smlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"smull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"umlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"umlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"umull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sqdmull_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sqdmlal_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sqdmlsl_asimdelem_l"_h, &Disassembler::DisassembleNEONMulByElementLong}, + {"sdot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"udot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"usdot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"sudot_asimdelem_d"_h, &Disassembler::DisassembleNEONDotProdByElement}, + {"fmlal2_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fmlal_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fmlsl2_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fmlsl_asimdelem_lh"_h, + &Disassembler::DisassembleNEONFPMulByElementLong}, + {"fcmla_asimdelem_c_h"_h, + &Disassembler::DisassembleNEONComplexMulByElement}, + {"fcmla_asimdelem_c_s"_h, + &Disassembler::DisassembleNEONComplexMulByElement}, + {"fmla_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmls_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmulx_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmul_asimdelem_rh_h"_h, + &Disassembler::DisassembleNEONHalfFPMulByElement}, + {"fmla_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"fmls_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"fmulx_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"fmul_asimdelem_r_sd"_h, &Disassembler::DisassembleNEONFPMulByElement}, + {"mla_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"mls_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"mul_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"saba_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"sabd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"shadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"shsub_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"smaxp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"smax_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"sminp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"smin_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"srhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uaba_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uabd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uhsub_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"umaxp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"umax_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"uminp_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"umin_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"urhadd_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameNoD}, + {"and_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bic_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bif_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bit_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"bsl_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"eor_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"orr_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"orn_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"pmul_asimdsame_only"_h, &Disassembler::DisassembleNEON3SameLogical}, + {"fmlal2_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"fmlal_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"fmlsl2_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"fmlsl_asimdsame_f"_h, &Disassembler::DisassembleNEON3SameFHM}, + {"sri_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"srshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"srsra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"sshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ssra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"urshr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ursra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ushr_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"usra_asimdshf_r"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"scvtf_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ucvtf_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"fcvtzs_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"fcvtzu_asimdshf_c"_h, &Disassembler::DisassembleNEONShiftRightImm}, + {"ushll_asimdshf_l"_h, &Disassembler::DisassembleNEONShiftLeftLongImm}, + {"sshll_asimdshf_l"_h, &Disassembler::DisassembleNEONShiftLeftLongImm}, + {"shrn_asimdshf_n"_h, &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"rshrn_asimdshf_n"_h, &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqrshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqshrun_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqrshrun_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"uqshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"uqrshrn_asimdshf_n"_h, + &Disassembler::DisassembleNEONShiftRightNarrowImm}, + {"sqdmlal_asisdelem_l"_h, + &Disassembler::DisassembleNEONScalarSatMulLongIndex}, + {"sqdmlsl_asisdelem_l"_h, + &Disassembler::DisassembleNEONScalarSatMulLongIndex}, + {"sqdmull_asisdelem_l"_h, + &Disassembler::DisassembleNEONScalarSatMulLongIndex}, + {"fmla_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmla_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmls_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmls_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmulx_asisdelem_rh_h"_h, + &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmulx_asisdelem_r_sd"_h, + &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmul_asisdelem_rh_h"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fmul_asisdelem_r_sd"_h, &Disassembler::DisassembleNEONFPScalarMulIndex}, + {"fabd_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"facge_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"facgt_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fcmeq_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fcmge_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fcmgt_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"fmulx_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"frecps_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"frsqrts_asisdsame_only"_h, &Disassembler::DisassembleNEONFPScalar3Same}, + {"sqrdmlah_asisdsame2_only"_h, &Disassembler::VisitNEONScalar3Same}, + {"sqrdmlsh_asisdsame2_only"_h, &Disassembler::VisitNEONScalar3Same}, + {"cmeq_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmge_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmgt_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmhi_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmhs_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"cmtst_asisdsame_only"_h, + &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"add_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"sub_asisdsame_only"_h, &Disassembler::DisassembleNEONScalar3SameOnlyD}, + {"fmaxnmv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fmaxv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fminnmv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fminv_asimdall_only_h"_h, + &Disassembler::DisassembleNEONFP16AcrossLanes}, + {"fmaxnmv_asimdall_only_sd"_h, + &Disassembler::DisassembleNEONFPAcrossLanes}, + {"fminnmv_asimdall_only_sd"_h, + &Disassembler::DisassembleNEONFPAcrossLanes}, + {"fmaxv_asimdall_only_sd"_h, &Disassembler::DisassembleNEONFPAcrossLanes}, + {"fminv_asimdall_only_sd"_h, &Disassembler::DisassembleNEONFPAcrossLanes}, + {"shl_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sli_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sri_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"srshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"srsra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"ssra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"urshr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"ursra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"ushr_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"usra_asisdshf_r"_h, &Disassembler::DisassembleNEONScalarShiftImmOnlyD}, + {"sqrshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"sqrshrun_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"sqshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"sqshrun_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"uqrshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"uqshrn_asisdshf_n"_h, + &Disassembler::DisassembleNEONScalarShiftRightNarrowImm}, + {"cmeq_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmge_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmgt_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmle_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"cmlt_asisdmisc_z"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"abs_asisdmisc_r"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"neg_asisdmisc_r"_h, &Disassembler::DisassembleNEONScalar2RegMiscOnlyD}, + {"fcmeq_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmge_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmgt_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmle_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcmlt_asisdmisc_fz"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtas_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtau_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtms_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtmu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtns_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtnu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtps_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtpu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtxn_asisdmisc_n"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtzs_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"fcvtzu_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"frecpe_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"frecpx_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"frsqrte_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"scvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"ucvtf_asisdmisc_r"_h, &Disassembler::DisassembleNEONFPScalar2RegMisc}, + {"adclb_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"adclt_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"addhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"addhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"addp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"aesd_z_zz"_h, &Disassembler::Disassemble_ZdnB_ZdnB_ZmB}, + {"aese_z_zz"_h, &Disassembler::Disassemble_ZdnB_ZdnB_ZmB}, + {"aesimc_z_z"_h, &Disassembler::Disassemble_ZdnB_ZdnB}, + {"aesmc_z_z"_h, &Disassembler::Disassemble_ZdnB_ZdnB}, + {"bcax_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"bdep_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"bext_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"bgrp_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"bsl1n_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"bsl2n_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"bsl_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"cadd_z_zz"_h, &Disassembler::DisassembleSVEComplexIntAddition}, + {"cdot_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb_const}, + {"cdot_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const}, + {"cdot_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const}, + {"cmla_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT_const}, + {"cmla_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const}, + {"cmla_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const}, + {"eor3_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"eorbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"eortb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"ext_z_zi_con"_h, &Disassembler::Disassemble_ZdB_Zn1B_Zn2B_imm}, + {"faddp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fcvtlt_z_p_z_h2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnH}, + {"fcvtlt_z_p_z_s2d"_h, &Disassembler::Disassemble_ZdD_PgM_ZnS}, + {"fcvtnt_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD}, + {"fcvtnt_z_p_z_s2h"_h, &Disassembler::Disassemble_ZdH_PgM_ZnS}, + {"fcvtx_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD}, + {"fcvtxnt_z_p_z_d2s"_h, &Disassembler::Disassemble_ZdS_PgM_ZnD}, + {"flogb_z_p_z"_h, &Disassembler::DisassembleSVEFlogb}, + {"fmaxnmp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fmaxp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fminnmp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fminp_z_p_zz"_h, &Disassembler::DisassembleSVEFPPair}, + {"fmlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"fmlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"fmlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"fmlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH}, + {"fmlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"histcnt_z_p_zz"_h, &Disassembler::Disassemble_ZdT_PgZ_ZnT_ZmT}, + {"histseg_z_zz"_h, &Disassembler::Disassemble_ZdB_ZnB_ZmB}, + {"ldnt1b_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1b_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1d_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1h_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1h_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sb_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1sb_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sh_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1sh_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"ldnt1sw_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1w_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm}, + {"ldnt1w_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm}, + {"match_p_p_zz"_h, &Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT}, + {"mla_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"mla_z_zzzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"mla_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"mls_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"mls_z_zzzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"mls_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"mul_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"mul_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"mul_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"mul_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"nbsl_z_zzz"_h, &Disassembler::DisassembleSVEBitwiseTernary}, + {"nmatch_p_p_zz"_h, &Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT}, + {"pmul_z_zz"_h, &Disassembler::Disassemble_ZdB_ZnB_ZmB}, + {"pmullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"pmullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"raddhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"raddhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"rax1_z_zz"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD}, + {"rshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"rshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"rsubhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"rsubhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"saba_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"sabalb_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sabalt_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sabdlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sabdlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sadalp_z_p_z"_h, &Disassembler::Disassemble_ZdaT_PgM_ZnTb}, + {"saddlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"saddlbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"saddlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"saddwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"saddwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"sbclb_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"sbclt_z_zzz"_h, &Disassembler::DisassembleSVEAddSubCarry}, + {"shadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"shrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"shrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"shsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"shsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sli_z_zzi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"sm4e_z_zz"_h, &Disassembler::Disassemble_ZdnS_ZdnS_ZmS}, + {"sm4ekey_z_zz"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS}, + {"smaxp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sminp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"smlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"smlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"smullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"smullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"smullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"smullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"smullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"splice_z_p_zz_con"_h, &Disassembler::Disassemble_ZdT_Pg_Zn1T_Zn2T}, + {"sqabs_z_p_z"_h, &Disassembler::Disassemble_ZdT_PgM_ZnT}, + {"sqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqcadd_z_zz"_h, &Disassembler::DisassembleSVEComplexIntAddition}, + {"sqdmlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmlalbt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmlslbt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"sqdmlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm}, + {"sqdmlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm}, + {"sqdmulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"sqdmulh_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"sqdmulh_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"sqdmulh_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"sqdmullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sqdmullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"sqdmullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"sqdmullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"sqdmullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"sqdmullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"sqneg_z_p_z"_h, &Disassembler::Disassemble_ZdT_PgM_ZnT}, + {"sqrdcmlah_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT_const}, + {"sqrdcmlah_z_zzzi_h"_h, + &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const}, + {"sqrdcmlah_z_zzzi_s"_h, + &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const}, + {"sqrdmlah_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"sqrdmlah_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm}, + {"sqrdmlah_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm}, + {"sqrdmlah_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm}, + {"sqrdmlsh_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"sqrdmlsh_z_zzzi_d"_h, &Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm}, + {"sqrdmlsh_z_zzzi_h"_h, &Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm}, + {"sqrdmlsh_z_zzzi_s"_h, &Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm}, + {"sqrdmulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"sqrdmulh_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnD_ZmD_imm}, + {"sqrdmulh_z_zzi_h"_h, &Disassembler::Disassemble_ZdH_ZnH_ZmH_imm}, + {"sqrdmulh_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnS_ZmS_imm}, + {"sqrshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqrshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqrshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqrshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqrshrunb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqrshrunt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshl_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"sqshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqshlu_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"sqshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshrunb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqshrunt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"sqsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sqxtnb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"sqxtnt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"sqxtunb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"sqxtunt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"srhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"sri_z_zzi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"srshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"srshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"srshr_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"srsra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"sshllb_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"sshllt_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"ssra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"ssublb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssublbt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssublt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssubltb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"ssubwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"ssubwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"stnt1b_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1b_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm}, + {"stnt1d_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1h_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1h_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm}, + {"stnt1w_z_p_ar_d_64_unscaled"_h, + &Disassembler::Disassemble_ZtD_Pg_ZnD_Xm}, + {"stnt1w_z_p_ar_s_x32_unscaled"_h, + &Disassembler::Disassemble_ZtS_Pg_ZnS_Xm}, + {"subhnb_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"subhnt_z_zz"_h, &Disassembler::DisassembleSVEAddSubHigh}, + {"suqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"tbl_z_zz_2"_h, &Disassembler::Disassemble_ZdT_Zn1T_Zn2T_ZmT}, + {"tbx_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"uaba_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"uabalb_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uabalt_z_zzz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uabdlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uabdlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uadalp_z_p_z"_h, &Disassembler::Disassemble_ZdaT_PgM_ZnTb}, + {"uaddlb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uaddlt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"uaddwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"uaddwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"uhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uhsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uhsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"umaxp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uminp_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"umlalb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlalb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlalb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umlalt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlalt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlalt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umlslb_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlslb_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlslb_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umlslt_z_zzz"_h, &Disassembler::Disassemble_ZdaT_ZnTb_ZmTb}, + {"umlslt_z_zzzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umlslt_z_zzzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umulh_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmT}, + {"umullb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"umullb_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umullb_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"umullt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"umullt_z_zzi_d"_h, &Disassembler::Disassemble_ZdD_ZnS_ZmS_imm}, + {"umullt_z_zzi_s"_h, &Disassembler::Disassemble_ZdS_ZnH_ZmH_imm}, + {"uqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqrshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqrshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqrshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqrshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqshl_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"uqshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqshrnb_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqshrnt_z_zi"_h, &Disassembler::DisassembleSVEShiftRightImm}, + {"uqsub_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqsubr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"uqxtnb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"uqxtnt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb}, + {"urecpe_z_p_z"_h, &Disassembler::Disassemble_ZdS_PgM_ZnS}, + {"urhadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"urshl_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"urshlr_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"urshr_z_p_zi"_h, &Disassembler::VisitSVEBitwiseShiftByImm_Predicated}, + {"ursqrte_z_p_z"_h, &Disassembler::Disassemble_ZdS_PgM_ZnS}, + {"ursra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"ushllb_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"ushllt_z_zi"_h, &Disassembler::DisassembleSVEShiftLeftImm}, + {"usqadd_z_p_zz"_h, &Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT}, + {"usra_z_zi"_h, &Disassembler::VisitSVEBitwiseShiftUnpredicated}, + {"usublb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"usublt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnTb_ZmTb}, + {"usubwb_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"usubwt_z_zz"_h, &Disassembler::Disassemble_ZdT_ZnT_ZmTb}, + {"whilege_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilegt_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilehi_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilehs_p_p_rr"_h, + &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilerw_p_rr"_h, &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"whilewr_p_rr"_h, &Disassembler::VisitSVEIntCompareScalarCountAndLimit}, + {"xar_z_zzi"_h, &Disassembler::Disassemble_ZdnT_ZdnT_ZmT_const}, + {"fmmla_z_zzz_s"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"fmmla_z_zzz_d"_h, &Disassembler::Disassemble_ZdaT_ZnT_ZmT}, + {"smmla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"ummla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"usmmla_z_zzz"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"usdot_z_zzz_s"_h, &Disassembler::Disassemble_ZdaS_ZnB_ZmB}, + {"smmla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B}, + {"ummla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B}, + {"usmmla_asimdsame2_g"_h, &Disassembler::Disassemble_Vd4S_Vn16B_Vm16B}, + {"ld1row_z_p_bi_u32"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1row_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1rod_z_p_bi_u64"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1rod_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1rob_z_p_bi_u8"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1rob_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"ld1roh_z_p_bi_u16"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm}, + {"ld1roh_z_p_br_contiguous"_h, + &Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar}, + {"usdot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex}, + {"sudot_z_zzzi_s"_h, &Disassembler::VisitSVEMulIndex}, + {"usdot_asimdsame2_d"_h, &Disassembler::VisitNEON3SameExtra}, + {"addg_64_addsub_immtags"_h, + &Disassembler::Disassemble_XdSP_XnSP_uimm6_uimm4}, + {"gmi_64g_dp_2src"_h, &Disassembler::Disassemble_Xd_XnSP_Xm}, + {"irg_64i_dp_2src"_h, &Disassembler::Disassemble_XdSP_XnSP_Xm}, + {"ldg_64loffset_ldsttags"_h, &Disassembler::DisassembleMTELoadTag}, + {"st2g_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"st2g_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"st2g_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stgp_64_ldstpair_off"_h, &Disassembler::DisassembleMTEStoreTagPair}, + {"stgp_64_ldstpair_post"_h, &Disassembler::DisassembleMTEStoreTagPair}, + {"stgp_64_ldstpair_pre"_h, &Disassembler::DisassembleMTEStoreTagPair}, + {"stg_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stg_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stg_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stz2g_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stz2g_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stz2g_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stzg_64soffset_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stzg_64spost_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"stzg_64spre_ldsttags"_h, &Disassembler::DisassembleMTEStoreTag}, + {"subg_64_addsub_immtags"_h, + &Disassembler::Disassemble_XdSP_XnSP_uimm6_uimm4}, + {"subps_64s_dp_2src"_h, &Disassembler::Disassemble_Xd_XnSP_XmSP}, + {"subp_64s_dp_2src"_h, &Disassembler::Disassemble_Xd_XnSP_XmSP}, + {"cpyen_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyern_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyewn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpye_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfen_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfern_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfewn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfe_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfmn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfmrn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfmwn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfm_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfpn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfprn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfpwn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyfp_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpymn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpymrn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpymwn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpym_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpypn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyprn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpypwn_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"cpyp_cpy_memcms"_h, &Disassembler::DisassembleCpy}, + {"seten_set_memcms"_h, &Disassembler::DisassembleSet}, + {"sete_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setgen_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setge_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setgmn_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setgm_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setgpn_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setgp_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setmn_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setm_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setpn_set_memcms"_h, &Disassembler::DisassembleSet}, + {"setp_set_memcms"_h, &Disassembler::DisassembleSet}, + {"abs_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source}, + {"abs_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source}, + {"cnt_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source}, + {"cnt_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source}, + {"ctz_32_dp_1src"_h, &Disassembler::VisitDataProcessing1Source}, + {"ctz_64_dp_1src"_h, &Disassembler::VisitDataProcessing1Source}, + {"smax_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"smax_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"smin_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"smin_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"umax_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"umax_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"umin_32_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"umin_64_dp_2src"_h, &Disassembler::VisitDataProcessing2Source}, + {"smax_32_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + {"smax_64_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + {"smin_32_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + {"smin_64_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + {"umax_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + {"umax_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + {"umin_32u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + {"umin_64u_minmax_imm"_h, &Disassembler::DisassembleMinMaxImm}, + }; + return &form_to_visitor; +} // NOLINT(readability/fn_size) + +Disassembler::Disassembler() { + buffer_size_ = 256; + buffer_ = reinterpret_cast(malloc(buffer_size_)); + buffer_pos_ = 0; + own_buffer_ = true; + code_address_offset_ = 0; +} + +Disassembler::Disassembler(char *text_buffer, int buffer_size) { + buffer_size_ = buffer_size; + buffer_ = text_buffer; + buffer_pos_ = 0; + own_buffer_ = false; + code_address_offset_ = 0; +} + +Disassembler::~Disassembler() { + if (own_buffer_) { + free(buffer_); + } +} + +char *Disassembler::GetOutput() { return buffer_; } + +void Disassembler::VisitAddSubImmediate(const Instruction *instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool stack_op = + (rd_is_zr || RnIsZROrSP(instr)) && (instr->GetImmAddSub() == 0) ? true + : false; + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Rds, 'Rns, 'IAddSub"; + const char *form_cmp = "'Rns, 'IAddSub"; + const char *form_mov = "'Rds, 'Rns"; + + switch (form_hash_) { + case "add_32_addsub_imm"_h: + case "add_64_addsub_imm"_h: + if (stack_op) { + mnemonic = "mov"; + form = form_mov; + } + break; + case "adds_32s_addsub_imm"_h: + case "adds_64s_addsub_imm"_h: + if (rd_is_zr) { + mnemonic = "cmn"; + form = form_cmp; + } + break; + case "subs_32s_addsub_imm"_h: + case "subs_64s_addsub_imm"_h: + if (rd_is_zr) { + mnemonic = "cmp"; + form = form_cmp; + } + break; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitAddSubShifted(const Instruction *instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Rd, 'Rn, 'Rm'NDP"; + const char *form_cmp = "'Rn, 'Rm'NDP"; + const char *form_neg = "'Rd, 'Rm'NDP"; + + if (instr->GetShiftDP() == ROR) { + // Add/sub/adds/subs don't allow ROR as a shift mode. + VisitUnallocated(instr); + return; + } + + switch (form_hash_) { + case "adds_32_addsub_shift"_h: + case "adds_64_addsub_shift"_h: + if (rd_is_zr) { + mnemonic = "cmn"; + form = form_cmp; + } + break; + case "sub_32_addsub_shift"_h: + case "sub_64_addsub_shift"_h: + if (rn_is_zr) { + mnemonic = "neg"; + form = form_neg; + } + break; + case "subs_32_addsub_shift"_h: + case "subs_64_addsub_shift"_h: + if (rd_is_zr) { + mnemonic = "cmp"; + form = form_cmp; + } else if (rn_is_zr) { + mnemonic = "negs"; + form = form_neg; + } + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitAddSubExtended(const Instruction *instr) { + bool rd_is_zr = RdIsZROrSP(instr); + const char *mnemonic = ""; + Extend mode = static_cast(instr->GetExtendMode()); + const char *form = ((mode == UXTX) || (mode == SXTX)) ? "'Rds, 'Rns, 'Xm'Ext" + : "'Rds, 'Rns, 'Wm'Ext"; + const char *form_cmp = + ((mode == UXTX) || (mode == SXTX)) ? "'Rns, 'Xm'Ext" : "'Rns, 'Wm'Ext"; + + switch (instr->Mask(AddSubExtendedMask)) { + case ADD_w_ext: + case ADD_x_ext: + mnemonic = "add"; + break; + case ADDS_w_ext: + case ADDS_x_ext: { + mnemonic = "adds"; + if (rd_is_zr) { + mnemonic = "cmn"; + form = form_cmp; + } + break; + } + case SUB_w_ext: + case SUB_x_ext: + mnemonic = "sub"; + break; + case SUBS_w_ext: + case SUBS_x_ext: { + mnemonic = "subs"; + if (rd_is_zr) { + mnemonic = "cmp"; + form = form_cmp; + } + break; + } + default: + VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitAddSubWithCarry(const Instruction *instr) { + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm"; + const char *form_neg = "'Rd, 'Rm"; + + switch (instr->Mask(AddSubWithCarryMask)) { + case ADC_w: + case ADC_x: + mnemonic = "adc"; + break; + case ADCS_w: + case ADCS_x: + mnemonic = "adcs"; + break; + case SBC_w: + case SBC_x: { + mnemonic = "sbc"; + if (rn_is_zr) { + mnemonic = "ngc"; + form = form_neg; + } + break; + } + case SBCS_w: + case SBCS_x: { + mnemonic = "sbcs"; + if (rn_is_zr) { + mnemonic = "ngcs"; + form = form_neg; + } + break; + } + default: + VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitRotateRightIntoFlags(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Xn, 'IRr, 'INzcv"); +} + + +void Disassembler::VisitEvaluateIntoFlags(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Wn"); +} + + +void Disassembler::VisitLogicalImmediate(const Instruction *instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Rds, 'Rn, 'ITri"; + + if (instr->GetImmLogical() == 0) { + // The immediate encoded in the instruction is not in the expected format. + Format(instr, "unallocated", "(LogicalImmediate)"); + return; + } + + switch (instr->Mask(LogicalImmediateMask)) { + case AND_w_imm: + case AND_x_imm: + mnemonic = "and"; + break; + case ORR_w_imm: + case ORR_x_imm: { + mnemonic = "orr"; + unsigned reg_size = + (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize; + if (rn_is_zr && !IsMovzMovnImm(reg_size, instr->GetImmLogical())) { + mnemonic = "mov"; + form = "'Rds, 'ITri"; + } + break; + } + case EOR_w_imm: + case EOR_x_imm: + mnemonic = "eor"; + break; + case ANDS_w_imm: + case ANDS_x_imm: { + mnemonic = "ands"; + if (rd_is_zr) { + mnemonic = "tst"; + form = "'Rn, 'ITri"; + } + break; + } + default: + VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +bool Disassembler::IsMovzMovnImm(unsigned reg_size, uint64_t value) { + VIXL_ASSERT((reg_size == kXRegSize) || + ((reg_size == kWRegSize) && (value <= 0xffffffff))); + + // Test for movz: 16 bits set at positions 0, 16, 32 or 48. + if (((value & UINT64_C(0xffffffffffff0000)) == 0) || + ((value & UINT64_C(0xffffffff0000ffff)) == 0) || + ((value & UINT64_C(0xffff0000ffffffff)) == 0) || + ((value & UINT64_C(0x0000ffffffffffff)) == 0)) { + return true; + } + + // Test for movn: NOT(16 bits set at positions 0, 16, 32 or 48). + if ((reg_size == kXRegSize) && + (((~value & UINT64_C(0xffffffffffff0000)) == 0) || + ((~value & UINT64_C(0xffffffff0000ffff)) == 0) || + ((~value & UINT64_C(0xffff0000ffffffff)) == 0) || + ((~value & UINT64_C(0x0000ffffffffffff)) == 0))) { + return true; + } + if ((reg_size == kWRegSize) && (((value & 0xffff0000) == 0xffff0000) || + ((value & 0x0000ffff) == 0x0000ffff))) { + return true; + } + return false; +} + + +void Disassembler::VisitLogicalShifted(const Instruction *instr) { + bool rd_is_zr = RdIsZROrSP(instr); + bool rn_is_zr = RnIsZROrSP(instr); + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Rd, 'Rn, 'Rm'NLo"; + + switch (form_hash_) { + case "ands_32_log_shift"_h: + case "ands_64_log_shift"_h: + if (rd_is_zr) { + mnemonic = "tst"; + form = "'Rn, 'Rm'NLo"; + } + break; + case "orr_32_log_shift"_h: + case "orr_64_log_shift"_h: + if (rn_is_zr && (instr->GetImmDPShift() == 0) && + (instr->GetShiftDP() == LSL)) { + mnemonic = "mov"; + form = "'Rd, 'Rm"; + } + break; + case "orn_32_log_shift"_h: + case "orn_64_log_shift"_h: + if (rn_is_zr) { + mnemonic = "mvn"; + form = "'Rd, 'Rm'NLo"; + } + break; + } + + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitConditionalCompareRegister(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Rn, 'Rm, 'INzcv, 'Cond"); +} + + +void Disassembler::VisitConditionalCompareImmediate(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Rn, 'IP, 'INzcv, 'Cond"); +} + + +void Disassembler::VisitConditionalSelect(const Instruction *instr) { + bool rnm_is_zr = (RnIsZROrSP(instr) && RmIsZROrSP(instr)); + bool rn_is_rm = (instr->GetRn() == instr->GetRm()); + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm, 'Cond"; + const char *form_test = "'Rd, 'CInv"; + const char *form_update = "'Rd, 'Rn, 'CInv"; + + Condition cond = static_cast(instr->GetCondition()); + bool invertible_cond = (cond != al) && (cond != nv); + + switch (instr->Mask(ConditionalSelectMask)) { + case CSEL_w: + case CSEL_x: + mnemonic = "csel"; + break; + case CSINC_w: + case CSINC_x: { + mnemonic = "csinc"; + if (rnm_is_zr && invertible_cond) { + mnemonic = "cset"; + form = form_test; + } else if (rn_is_rm && invertible_cond) { + mnemonic = "cinc"; + form = form_update; + } + break; + } + case CSINV_w: + case CSINV_x: { + mnemonic = "csinv"; + if (rnm_is_zr && invertible_cond) { + mnemonic = "csetm"; + form = form_test; + } else if (rn_is_rm && invertible_cond) { + mnemonic = "cinv"; + form = form_update; + } + break; + } + case CSNEG_w: + case CSNEG_x: { + mnemonic = "csneg"; + if (rn_is_rm && invertible_cond) { + mnemonic = "cneg"; + form = form_update; + } + break; + } + default: + VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitBitfield(const Instruction *instr) { + unsigned s = instr->GetImmS(); + unsigned r = instr->GetImmR(); + unsigned rd_size_minus_1 = + ((instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize) - 1; + const char *mnemonic = ""; + const char *form = ""; + const char *form_shift_right = "'Rd, 'Rn, 'IBr"; + const char *form_extend = "'Rd, 'Wn"; + const char *form_bfiz = "'Rd, 'Rn, 'IBZ-r, 'IBs+1"; + const char *form_bfc = "'Rd, 'IBZ-r, 'IBs+1"; + const char *form_bfx = "'Rd, 'Rn, 'IBr, 'IBs-r+1"; + const char *form_lsl = "'Rd, 'Rn, 'IBZ-r"; + + if (instr->GetSixtyFourBits() != instr->GetBitN()) { + VisitUnallocated(instr); + return; + } + + if ((instr->GetSixtyFourBits() == 0) && ((s > 31) || (r > 31))) { + VisitUnallocated(instr); + return; + } + + switch (instr->Mask(BitfieldMask)) { + case SBFM_w: + case SBFM_x: { + mnemonic = "sbfx"; + form = form_bfx; + if (r == 0) { + form = form_extend; + if (s == 7) { + mnemonic = "sxtb"; + } else if (s == 15) { + mnemonic = "sxth"; + } else if ((s == 31) && (instr->GetSixtyFourBits() == 1)) { + mnemonic = "sxtw"; + } else { + form = form_bfx; + } + } else if (s == rd_size_minus_1) { + mnemonic = "asr"; + form = form_shift_right; + } else if (s < r) { + mnemonic = "sbfiz"; + form = form_bfiz; + } + break; + } + case UBFM_w: + case UBFM_x: { + mnemonic = "ubfx"; + form = form_bfx; + if (r == 0) { + form = form_extend; + if (s == 7) { + mnemonic = "uxtb"; + } else if (s == 15) { + mnemonic = "uxth"; + } else { + form = form_bfx; + } + } + if (s == rd_size_minus_1) { + mnemonic = "lsr"; + form = form_shift_right; + } else if (r == s + 1) { + mnemonic = "lsl"; + form = form_lsl; + } else if (s < r) { + mnemonic = "ubfiz"; + form = form_bfiz; + } + break; + } + case BFM_w: + case BFM_x: { + mnemonic = "bfxil"; + form = form_bfx; + if (s < r) { + if (instr->GetRn() == kZeroRegCode) { + mnemonic = "bfc"; + form = form_bfc; + } else { + mnemonic = "bfi"; + form = form_bfiz; + } + } + } + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitExtract(const Instruction *instr) { + const char *mnemonic = ""; + const char *form = "'Rd, 'Rn, 'Rm, 'IExtract"; + + switch (instr->Mask(ExtractMask)) { + case EXTR_w: + case EXTR_x: { + if (instr->GetRn() == instr->GetRm()) { + mnemonic = "ror"; + form = "'Rd, 'Rn, 'IExtract"; + } else { + mnemonic = "extr"; + } + break; + } + default: + VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitPCRelAddressing(const Instruction *instr) { + switch (instr->Mask(PCRelAddressingMask)) { + case ADR: + Format(instr, "adr", "'Xd, 'AddrPCRelByte"); + break; + case ADRP: + Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); + break; + default: + Format(instr, "unimplemented", "(PCRelAddressing)"); + } +} + + +void Disassembler::VisitConditionalBranch(const Instruction *instr) { + // We can't use the mnemonic directly here, as there's no space between it and + // the condition. Assert that we have the correct mnemonic, then use "b" + // explicitly for formatting the output. + VIXL_ASSERT(form_hash_ == "b_only_condbranch"_h); + Format(instr, "b.'CBrn", "'TImmCond"); +} + + +void Disassembler::VisitUnconditionalBranchToRegister( + const Instruction *instr) { + const char *form = "'Xn"; + + switch (form_hash_) { + case "ret_64r_branch_reg"_h: + if (instr->GetRn() == kLinkRegCode) { + form = ""; + } + break; + case "retaa_64e_branch_reg"_h: + case "retab_64e_branch_reg"_h: + form = ""; + break; + case "braa_64p_branch_reg"_h: + case "brab_64p_branch_reg"_h: + case "blraa_64p_branch_reg"_h: + case "blrab_64p_branch_reg"_h: + form = "'Xn, 'Xds"; + break; + } + + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitUnconditionalBranch(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'TImmUncn"); +} + + +void Disassembler::VisitDataProcessing1Source(const Instruction *instr) { + const char *form = "'Rd, 'Rn"; + + switch (form_hash_) { + case "pacia_64p_dp_1src"_h: + case "pacda_64p_dp_1src"_h: + case "autia_64p_dp_1src"_h: + case "autda_64p_dp_1src"_h: + case "pacib_64p_dp_1src"_h: + case "pacdb_64p_dp_1src"_h: + case "autib_64p_dp_1src"_h: + case "autdb_64p_dp_1src"_h: + form = "'Xd, 'Xns"; + break; + case "paciza_64z_dp_1src"_h: + case "pacdza_64z_dp_1src"_h: + case "autiza_64z_dp_1src"_h: + case "autdza_64z_dp_1src"_h: + case "pacizb_64z_dp_1src"_h: + case "pacdzb_64z_dp_1src"_h: + case "autizb_64z_dp_1src"_h: + case "autdzb_64z_dp_1src"_h: + case "xpacd_64z_dp_1src"_h: + case "xpaci_64z_dp_1src"_h: + form = "'Xd"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitDataProcessing2Source(const Instruction *instr) { + std::string mnemonic = mnemonic_; + const char *form = "'Rd, 'Rn, 'Rm"; + + switch (form_hash_) { + case "asrv_32_dp_2src"_h: + case "asrv_64_dp_2src"_h: + case "lslv_32_dp_2src"_h: + case "lslv_64_dp_2src"_h: + case "lsrv_32_dp_2src"_h: + case "lsrv_64_dp_2src"_h: + case "rorv_32_dp_2src"_h: + case "rorv_64_dp_2src"_h: + // Drop the last 'v' character. + VIXL_ASSERT(mnemonic[3] == 'v'); + mnemonic.pop_back(); + break; + case "pacga_64p_dp_2src"_h: + form = "'Xd, 'Xn, 'Xms"; + break; + case "crc32x_64c_dp_2src"_h: + case "crc32cx_64c_dp_2src"_h: + form = "'Wd, 'Wn, 'Xm"; + break; + } + Format(instr, mnemonic.c_str(), form); +} + + +void Disassembler::VisitDataProcessing3Source(const Instruction *instr) { + bool ra_is_zr = RaIsZROrSP(instr); + const char *mnemonic = ""; + const char *form = "'Xd, 'Wn, 'Wm, 'Xa"; + const char *form_rrr = "'Rd, 'Rn, 'Rm"; + const char *form_rrrr = "'Rd, 'Rn, 'Rm, 'Ra"; + const char *form_xww = "'Xd, 'Wn, 'Wm"; + const char *form_xxx = "'Xd, 'Xn, 'Xm"; + + switch (instr->Mask(DataProcessing3SourceMask)) { + case MADD_w: + case MADD_x: { + mnemonic = "madd"; + form = form_rrrr; + if (ra_is_zr) { + mnemonic = "mul"; + form = form_rrr; + } + break; + } + case MSUB_w: + case MSUB_x: { + mnemonic = "msub"; + form = form_rrrr; + if (ra_is_zr) { + mnemonic = "mneg"; + form = form_rrr; + } + break; + } + case SMADDL_x: { + mnemonic = "smaddl"; + if (ra_is_zr) { + mnemonic = "smull"; + form = form_xww; + } + break; + } + case SMSUBL_x: { + mnemonic = "smsubl"; + if (ra_is_zr) { + mnemonic = "smnegl"; + form = form_xww; + } + break; + } + case UMADDL_x: { + mnemonic = "umaddl"; + if (ra_is_zr) { + mnemonic = "umull"; + form = form_xww; + } + break; + } + case UMSUBL_x: { + mnemonic = "umsubl"; + if (ra_is_zr) { + mnemonic = "umnegl"; + form = form_xww; + } + break; + } + case SMULH_x: { + mnemonic = "smulh"; + form = form_xxx; + break; + } + case UMULH_x: { + mnemonic = "umulh"; + form = form_xxx; + break; + } + default: + VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + +void Disassembler::DisassembleMinMaxImm(const Instruction *instr) { + const char *suffix = (instr->ExtractBit(18) == 0) ? "'s1710" : "'u1710"; + FormatWithDecodedMnemonic(instr, "'Rd, 'Rn, #", suffix); +} + +void Disassembler::VisitCompareBranch(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Rt, 'TImmCmpa"); +} + + +void Disassembler::VisitTestBranch(const Instruction *instr) { + // If the top bit of the immediate is clear, the tested register is + // disassembled as Wt, otherwise Xt. As the top bit of the immediate is + // encoded in bit 31 of the instruction, we can reuse the Rt form, which + // uses bit 31 (normally "sf") to choose the register size. + FormatWithDecodedMnemonic(instr, "'Rt, 'It, 'TImmTest"); +} + + +void Disassembler::VisitMoveWideImmediate(const Instruction *instr) { + const char *mnemonic = ""; + const char *form = "'Rd, 'IMoveImm"; + + // Print the shift separately for movk, to make it clear which half word will + // be overwritten. Movn and movz print the computed immediate, which includes + // shift calculation. + switch (instr->Mask(MoveWideImmediateMask)) { + case MOVN_w: + case MOVN_x: + if ((instr->GetImmMoveWide()) || (instr->GetShiftMoveWide() == 0)) { + if ((instr->GetSixtyFourBits() == 0) && + (instr->GetImmMoveWide() == 0xffff)) { + mnemonic = "movn"; + } else { + mnemonic = "mov"; + form = "'Rd, 'IMoveNeg"; + } + } else { + mnemonic = "movn"; + } + break; + case MOVZ_w: + case MOVZ_x: + if ((instr->GetImmMoveWide()) || (instr->GetShiftMoveWide() == 0)) + mnemonic = "mov"; + else + mnemonic = "movz"; + break; + case MOVK_w: + case MOVK_x: + mnemonic = "movk"; + form = "'Rd, 'IMoveLSL"; + break; + default: + VIXL_UNREACHABLE(); + } + Format(instr, mnemonic, form); +} + + +#define LOAD_STORE_LIST(V) \ + V(STRB_w, "'Wt") \ + V(STRH_w, "'Wt") \ + V(STR_w, "'Wt") \ + V(STR_x, "'Xt") \ + V(LDRB_w, "'Wt") \ + V(LDRH_w, "'Wt") \ + V(LDR_w, "'Wt") \ + V(LDR_x, "'Xt") \ + V(LDRSB_x, "'Xt") \ + V(LDRSH_x, "'Xt") \ + V(LDRSW_x, "'Xt") \ + V(LDRSB_w, "'Wt") \ + V(LDRSH_w, "'Wt") \ + V(STR_b, "'Bt") \ + V(STR_h, "'Ht") \ + V(STR_s, "'St") \ + V(STR_d, "'Dt") \ + V(LDR_b, "'Bt") \ + V(LDR_h, "'Ht") \ + V(LDR_s, "'St") \ + V(LDR_d, "'Dt") \ + V(STR_q, "'Qt") \ + V(LDR_q, "'Qt") + +void Disassembler::VisitLoadStorePreIndex(const Instruction *instr) { + const char *form = "(LoadStorePreIndex)"; + const char *suffix = ", ['Xns'ILSi]!"; + + switch (instr->Mask(LoadStorePreIndexMask)) { +#define LS_PREINDEX(A, B) \ + case A##_pre: \ + form = B; \ + break; + LOAD_STORE_LIST(LS_PREINDEX) +#undef LS_PREINDEX + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + + +void Disassembler::VisitLoadStorePostIndex(const Instruction *instr) { + const char *form = "(LoadStorePostIndex)"; + const char *suffix = ", ['Xns]'ILSi"; + + switch (instr->Mask(LoadStorePostIndexMask)) { +#define LS_POSTINDEX(A, B) \ + case A##_post: \ + form = B; \ + break; + LOAD_STORE_LIST(LS_POSTINDEX) +#undef LS_POSTINDEX + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + + +void Disassembler::VisitLoadStoreUnsignedOffset(const Instruction *instr) { + const char *form = "(LoadStoreUnsignedOffset)"; + const char *suffix = ", ['Xns'ILU]"; + + switch (instr->Mask(LoadStoreUnsignedOffsetMask)) { +#define LS_UNSIGNEDOFFSET(A, B) \ + case A##_unsigned: \ + form = B; \ + break; + LOAD_STORE_LIST(LS_UNSIGNEDOFFSET) +#undef LS_UNSIGNEDOFFSET + case PRFM_unsigned: + form = "'prefOp"; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + + +void Disassembler::VisitLoadStoreRCpcUnscaledOffset(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Wt, ['Xns'ILS]"; + const char *form_x = "'Xt, ['Xns'ILS]"; + + switch (form_hash_) { + case "ldapursb_64_ldapstl_unscaled"_h: + case "ldapursh_64_ldapstl_unscaled"_h: + case "ldapursw_64_ldapstl_unscaled"_h: + case "ldapur_64_ldapstl_unscaled"_h: + case "stlur_64_ldapstl_unscaled"_h: + form = form_x; + break; + } + + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitLoadStoreRegisterOffset(const Instruction *instr) { + const char *form = "(LoadStoreRegisterOffset)"; + const char *suffix = ", ['Xns, 'Offsetreg]"; + + switch (instr->Mask(LoadStoreRegisterOffsetMask)) { +#define LS_REGISTEROFFSET(A, B) \ + case A##_reg: \ + form = B; \ + break; + LOAD_STORE_LIST(LS_REGISTEROFFSET) +#undef LS_REGISTEROFFSET + case PRFM_reg: + form = "'prefOp"; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + + +void Disassembler::VisitLoadStoreUnscaledOffset(const Instruction *instr) { + const char *form = "'Wt"; + const char *suffix = ", ['Xns'ILS]"; + + switch (form_hash_) { + case "ldur_64_ldst_unscaled"_h: + case "ldursb_64_ldst_unscaled"_h: + case "ldursh_64_ldst_unscaled"_h: + case "ldursw_64_ldst_unscaled"_h: + case "stur_64_ldst_unscaled"_h: + form = "'Xt"; + break; + case "ldur_b_ldst_unscaled"_h: + case "stur_b_ldst_unscaled"_h: + form = "'Bt"; + break; + case "ldur_h_ldst_unscaled"_h: + case "stur_h_ldst_unscaled"_h: + form = "'Ht"; + break; + case "ldur_s_ldst_unscaled"_h: + case "stur_s_ldst_unscaled"_h: + form = "'St"; + break; + case "ldur_d_ldst_unscaled"_h: + case "stur_d_ldst_unscaled"_h: + form = "'Dt"; + break; + case "ldur_q_ldst_unscaled"_h: + case "stur_q_ldst_unscaled"_h: + form = "'Qt"; + break; + case "prfum_p_ldst_unscaled"_h: + form = "'prefOp"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + + +void Disassembler::VisitLoadLiteral(const Instruction *instr) { + const char *form = "'Wt"; + const char *suffix = ", 'ILLiteral 'LValue"; + + switch (form_hash_) { + case "ldr_64_loadlit"_h: + case "ldrsw_64_loadlit"_h: + form = "'Xt"; + break; + case "ldr_s_loadlit"_h: + form = "'St"; + break; + case "ldr_d_loadlit"_h: + form = "'Dt"; + break; + case "ldr_q_loadlit"_h: + form = "'Qt"; + break; + case "prfm_p_loadlit"_h: + form = "'prefOp"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + + +#define LOAD_STORE_PAIR_LIST(V) \ + V(STP_w, "'Wt, 'Wt2", "2") \ + V(LDP_w, "'Wt, 'Wt2", "2") \ + V(LDPSW_x, "'Xt, 'Xt2", "2") \ + V(STP_x, "'Xt, 'Xt2", "3") \ + V(LDP_x, "'Xt, 'Xt2", "3") \ + V(STP_s, "'St, 'St2", "2") \ + V(LDP_s, "'St, 'St2", "2") \ + V(STP_d, "'Dt, 'Dt2", "3") \ + V(LDP_d, "'Dt, 'Dt2", "3") \ + V(LDP_q, "'Qt, 'Qt2", "4") \ + V(STP_q, "'Qt, 'Qt2", "4") + +void Disassembler::VisitLoadStorePairPostIndex(const Instruction *instr) { + const char *form = "(LoadStorePairPostIndex)"; + + switch (instr->Mask(LoadStorePairPostIndexMask)) { +#define LSP_POSTINDEX(A, B, C) \ + case A##_post: \ + form = B ", ['Xns]'ILP" C "i"; \ + break; + LOAD_STORE_PAIR_LIST(LSP_POSTINDEX) +#undef LSP_POSTINDEX + } + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitLoadStorePairPreIndex(const Instruction *instr) { + const char *form = "(LoadStorePairPreIndex)"; + + switch (instr->Mask(LoadStorePairPreIndexMask)) { +#define LSP_PREINDEX(A, B, C) \ + case A##_pre: \ + form = B ", ['Xns'ILP" C "i]!"; \ + break; + LOAD_STORE_PAIR_LIST(LSP_PREINDEX) +#undef LSP_PREINDEX + } + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitLoadStorePairOffset(const Instruction *instr) { + const char *form = "(LoadStorePairOffset)"; + + switch (instr->Mask(LoadStorePairOffsetMask)) { +#define LSP_OFFSET(A, B, C) \ + case A##_off: \ + form = B ", ['Xns'ILP" C "]"; \ + break; + LOAD_STORE_PAIR_LIST(LSP_OFFSET) +#undef LSP_OFFSET + } + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitLoadStorePairNonTemporal(const Instruction *instr) { + const char *form = "'Wt, 'Wt2, ['Xns'ILP2]"; + + switch (form_hash_) { + case "ldnp_64_ldstnapair_offs"_h: + case "stnp_64_ldstnapair_offs"_h: + form = "'Xt, 'Xt2, ['Xns'ILP3]"; + break; + case "ldnp_s_ldstnapair_offs"_h: + case "stnp_s_ldstnapair_offs"_h: + form = "'St, 'St2, ['Xns'ILP2]"; + break; + case "ldnp_d_ldstnapair_offs"_h: + case "stnp_d_ldstnapair_offs"_h: + form = "'Dt, 'Dt2, ['Xns'ILP3]"; + break; + case "ldnp_q_ldstnapair_offs"_h: + case "stnp_q_ldstnapair_offs"_h: + form = "'Qt, 'Qt2, ['Xns'ILP4]"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +// clang-format off +#define LOAD_STORE_EXCLUSIVE_LIST(V) \ + V(STXRB_w, "'Ws, 'Wt") \ + V(STXRH_w, "'Ws, 'Wt") \ + V(STXR_w, "'Ws, 'Wt") \ + V(STXR_x, "'Ws, 'Xt") \ + V(LDXR_x, "'Xt") \ + V(STXP_w, "'Ws, 'Wt, 'Wt2") \ + V(STXP_x, "'Ws, 'Xt, 'Xt2") \ + V(LDXP_w, "'Wt, 'Wt2") \ + V(LDXP_x, "'Xt, 'Xt2") \ + V(STLXRB_w, "'Ws, 'Wt") \ + V(STLXRH_w, "'Ws, 'Wt") \ + V(STLXR_w, "'Ws, 'Wt") \ + V(STLXR_x, "'Ws, 'Xt") \ + V(LDAXR_x, "'Xt") \ + V(STLXP_w, "'Ws, 'Wt, 'Wt2") \ + V(STLXP_x, "'Ws, 'Xt, 'Xt2") \ + V(LDAXP_w, "'Wt, 'Wt2") \ + V(LDAXP_x, "'Xt, 'Xt2") \ + V(STLR_x, "'Xt") \ + V(LDAR_x, "'Xt") \ + V(STLLR_x, "'Xt") \ + V(LDLAR_x, "'Xt") \ + V(CAS_w, "'Ws, 'Wt") \ + V(CAS_x, "'Xs, 'Xt") \ + V(CASA_w, "'Ws, 'Wt") \ + V(CASA_x, "'Xs, 'Xt") \ + V(CASL_w, "'Ws, 'Wt") \ + V(CASL_x, "'Xs, 'Xt") \ + V(CASAL_w, "'Ws, 'Wt") \ + V(CASAL_x, "'Xs, 'Xt") \ + V(CASB, "'Ws, 'Wt") \ + V(CASAB, "'Ws, 'Wt") \ + V(CASLB, "'Ws, 'Wt") \ + V(CASALB, "'Ws, 'Wt") \ + V(CASH, "'Ws, 'Wt") \ + V(CASAH, "'Ws, 'Wt") \ + V(CASLH, "'Ws, 'Wt") \ + V(CASALH, "'Ws, 'Wt") \ + V(CASP_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASP_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPA_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPA_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPL_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPL_x, "'Xs, 'Xs+, 'Xt, 'Xt+") \ + V(CASPAL_w, "'Ws, 'Ws+, 'Wt, 'Wt+") \ + V(CASPAL_x, "'Xs, 'Xs+, 'Xt, 'Xt+") +// clang-format on + + +void Disassembler::VisitLoadStoreExclusive(const Instruction *instr) { + const char *form = "'Wt"; + const char *suffix = ", ['Xns]"; + + switch (instr->Mask(LoadStoreExclusiveMask)) { +#define LSX(A, B) \ + case A: \ + form = B; \ + break; + LOAD_STORE_EXCLUSIVE_LIST(LSX) +#undef LSX + } + + switch (instr->Mask(LoadStoreExclusiveMask)) { + case CASP_w: + case CASP_x: + case CASPA_w: + case CASPA_x: + case CASPL_w: + case CASPL_x: + case CASPAL_w: + case CASPAL_x: + if ((instr->GetRs() % 2 == 1) || (instr->GetRt() % 2 == 1)) { + VisitUnallocated(instr); + return; + } + break; + } + + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitLoadStorePAC(const Instruction *instr) { + const char *form = "'Xt, ['Xns'ILA]"; + const char *suffix = ""; + switch (form_hash_) { + case "ldraa_64w_ldst_pac"_h: + case "ldrab_64w_ldst_pac"_h: + suffix = "!"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitAtomicMemory(const Instruction *instr) { + bool is_x = (instr->ExtractBits(31, 30) == 3); + const char *form = is_x ? "'Xs, 'Xt" : "'Ws, 'Wt"; + const char *suffix = ", ['Xns]"; + + std::string mnemonic = mnemonic_; + + switch (form_hash_) { + case "ldaprb_32l_memop"_h: + case "ldaprh_32l_memop"_h: + case "ldapr_32l_memop"_h: + form = "'Wt"; + break; + case "ldapr_64l_memop"_h: + form = "'Xt"; + break; + default: + // Zero register implies a store instruction. + if (instr->GetRt() == kZeroRegCode) { + mnemonic.replace(0, 2, "st"); + form = is_x ? "'Xs" : "'Ws"; + } + } + Format(instr, mnemonic.c_str(), form, suffix); +} + + +void Disassembler::VisitFPCompare(const Instruction *instr) { + const char *form = "'Fn, 'Fm"; + switch (form_hash_) { + case "fcmpe_dz_floatcmp"_h: + case "fcmpe_hz_floatcmp"_h: + case "fcmpe_sz_floatcmp"_h: + case "fcmp_dz_floatcmp"_h: + case "fcmp_hz_floatcmp"_h: + case "fcmp_sz_floatcmp"_h: + form = "'Fn, #0.0"; + } + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitFPConditionalCompare(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Fn, 'Fm, 'INzcv, 'Cond"); +} + + +void Disassembler::VisitFPConditionalSelect(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm, 'Cond"); +} + + +void Disassembler::VisitFPDataProcessing1Source(const Instruction *instr) { + const char *form = "'Fd, 'Fn"; + switch (form_hash_) { + case "fcvt_ds_floatdp1"_h: + form = "'Dd, 'Sn"; + break; + case "fcvt_sd_floatdp1"_h: + form = "'Sd, 'Dn"; + break; + case "fcvt_hs_floatdp1"_h: + form = "'Hd, 'Sn"; + break; + case "fcvt_sh_floatdp1"_h: + form = "'Sd, 'Hn"; + break; + case "fcvt_dh_floatdp1"_h: + form = "'Dd, 'Hn"; + break; + case "fcvt_hd_floatdp1"_h: + form = "'Hd, 'Dn"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitFPDataProcessing2Source(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm"); +} + + +void Disassembler::VisitFPDataProcessing3Source(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Fd, 'Fn, 'Fm, 'Fa"); +} + + +void Disassembler::VisitFPImmediate(const Instruction *instr) { + const char *form = "'Hd"; + const char *suffix = ", 'IFP"; + switch (form_hash_) { + case "fmov_s_floatimm"_h: + form = "'Sd"; + break; + case "fmov_d_floatimm"_h: + form = "'Dd"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + + +void Disassembler::VisitFPIntegerConvert(const Instruction *instr) { + const char *form = "'Rd, 'Fn"; + switch (form_hash_) { + case "fmov_h32_float2int"_h: + case "fmov_h64_float2int"_h: + case "fmov_s32_float2int"_h: + case "fmov_d64_float2int"_h: + case "scvtf_d32_float2int"_h: + case "scvtf_d64_float2int"_h: + case "scvtf_h32_float2int"_h: + case "scvtf_h64_float2int"_h: + case "scvtf_s32_float2int"_h: + case "scvtf_s64_float2int"_h: + case "ucvtf_d32_float2int"_h: + case "ucvtf_d64_float2int"_h: + case "ucvtf_h32_float2int"_h: + case "ucvtf_h64_float2int"_h: + case "ucvtf_s32_float2int"_h: + case "ucvtf_s64_float2int"_h: + form = "'Fd, 'Rn"; + break; + case "fmov_v64i_float2int"_h: + form = "'Vd.D[1], 'Rn"; + break; + case "fmov_64vx_float2int"_h: + form = "'Rd, 'Vn.D[1]"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + + +void Disassembler::VisitFPFixedPointConvert(const Instruction *instr) { + const char *form = "'Rd, 'Fn"; + const char *suffix = ", 'IFPFBits"; + + switch (form_hash_) { + case "scvtf_d32_float2fix"_h: + case "scvtf_d64_float2fix"_h: + case "scvtf_h32_float2fix"_h: + case "scvtf_h64_float2fix"_h: + case "scvtf_s32_float2fix"_h: + case "scvtf_s64_float2fix"_h: + case "ucvtf_d32_float2fix"_h: + case "ucvtf_d64_float2fix"_h: + case "ucvtf_h32_float2fix"_h: + case "ucvtf_h64_float2fix"_h: + case "ucvtf_s32_float2fix"_h: + case "ucvtf_s64_float2fix"_h: + form = "'Fd, 'Rn"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::DisassembleNoArgs(const Instruction *instr) { + Format(instr, mnemonic_.c_str(), ""); +} + +void Disassembler::VisitSystem(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "(System)"; + const char *suffix = NULL; + + switch (form_hash_) { + case "clrex_bn_barriers"_h: + form = (instr->GetCRm() == 0xf) ? "" : "'IX"; + break; + case "mrs_rs_systemmove"_h: + form = "'Xt, 'IY"; + break; + case "msr_sr_systemmove"_h: + form = "'IY, 'Xt"; + break; + case "bti_hb_hints"_h: + switch (instr->ExtractBits(7, 6)) { + case 0: + form = ""; + break; + case 1: + form = "c"; + break; + case 2: + form = "j"; + break; + case 3: + form = "jc"; + break; + } + break; + case "hint_hm_hints"_h: + form = "'IH"; + break; + case Hash("dmb_bo_barriers"): + form = "'M"; + break; + case Hash("dsb_bo_barriers"): { + int crm = instr->GetCRm(); + if (crm == 0) { + mnemonic = "ssbb"; + form = ""; + } else if (crm == 4) { + mnemonic = "pssbb"; + form = ""; + } else { + form = "'M"; + } + break; + } + case Hash("sys_cr_systeminstrs"): { + mnemonic = "dc"; + suffix = ", 'Xt"; + + const std::map dcop = { + {IVAU, "ivau"}, + {CVAC, "cvac"}, + {CVAU, "cvau"}, + {CVAP, "cvap"}, + {CVADP, "cvadp"}, + {CIVAC, "civac"}, + {ZVA, "zva"}, + {GVA, "gva"}, + {GZVA, "gzva"}, + {CGVAC, "cgvac"}, + {CGDVAC, "cgdvac"}, + {CGVAP, "cgvap"}, + {CGDVAP, "cgdvap"}, + {CIGVAC, "cigvac"}, + {CIGDVAC, "cigdvac"}, + }; + + uint32_t sysop = instr->GetSysOp(); + if (dcop.count(sysop)) { + if (sysop == IVAU) { + mnemonic = "ic"; + } + form = dcop.at(sysop); + } else { + mnemonic = "sys"; + form = "'G1, 'Kn, 'Km, 'G2"; + if (instr->GetRt() == 31) { + suffix = NULL; + } + break; + } + } + } + Format(instr, mnemonic, form, suffix); +} + + +void Disassembler::VisitException(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'IDebug"; + + switch (instr->Mask(ExceptionMask)) { + case HLT: + mnemonic = "hlt"; + break; + case BRK: + mnemonic = "brk"; + break; + case SVC: + mnemonic = "svc"; + break; + case HVC: + mnemonic = "hvc"; + break; + case SMC: + mnemonic = "smc"; + break; + case DCPS1: + mnemonic = "dcps1"; + form = "{'IDebug}"; + break; + case DCPS2: + mnemonic = "dcps2"; + form = "{'IDebug}"; + break; + case DCPS3: + mnemonic = "dcps3"; + form = "{'IDebug}"; + break; + default: + form = "(Exception)"; + } + Format(instr, mnemonic, form); +} + + +void Disassembler::VisitCrypto2RegSHA(const Instruction *instr) { + VisitUnimplemented(instr); +} + + +void Disassembler::VisitCrypto3RegSHA(const Instruction *instr) { + VisitUnimplemented(instr); +} + + +void Disassembler::VisitCryptoAES(const Instruction *instr) { + VisitUnimplemented(instr); +} + +void Disassembler::DisassembleNEON2RegAddlp(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + + static const NEONFormatMap map_lp_ta = + {{23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}}; + NEONFormatDecoder nfd(instr); + nfd.SetFormatMap(0, &map_lp_ta); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegCompare(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, #0"; + NEONFormatDecoder nfd(instr); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegFPCompare(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, #0.0"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegFPConvert(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + static const NEONFormatMap map_cvt_ta = {{22}, {NF_4S, NF_2D}}; + + static const NEONFormatMap map_cvt_tb = {{22, 30}, + {NF_4H, NF_8H, NF_2S, NF_4S}}; + NEONFormatDecoder nfd(instr, &map_cvt_tb, &map_cvt_ta); + + VectorFormat vform_dst = nfd.GetVectorFormat(0); + switch (form_hash_) { + case "fcvtl_asimdmisc_l"_h: + nfd.SetFormatMaps(&map_cvt_ta, &map_cvt_tb); + break; + case "fcvtxn_asimdmisc_n"_h: + if ((vform_dst != kFormat2S) && (vform_dst != kFormat4S)) { + mnemonic = NULL; + } + break; + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegFP(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegLogical(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + if (form_hash_ == "not_asimdmisc_r"_h) { + mnemonic = "mvn"; + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON2RegExtract(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + const char *suffix = NULL; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::IntegerFormatMap(), + NEONFormatDecoder::LongIntegerFormatMap()); + + if (form_hash_ == "shll_asimdmisc_s"_h) { + nfd.SetFormatMaps(nfd.LongIntegerFormatMap(), nfd.IntegerFormatMap()); + switch (instr->GetNEONSize()) { + case 0: + suffix = ", #8"; + break; + case 1: + suffix = ", #16"; + break; + case 2: + suffix = ", #32"; + break; + } + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form), suffix); +} + +void Disassembler::VisitNEON2RegMisc(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + NEONFormatDecoder nfd(instr); + + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if (vform_dst != kFormatUndefined) { + uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst); + switch (form_hash_) { + case "cnt_asimdmisc_r"_h: + case "rev16_asimdmisc_r"_h: + if (ls_dst != kBRegSize) { + mnemonic = NULL; + } + break; + case "rev32_asimdmisc_r"_h: + if ((ls_dst == kDRegSize) || (ls_dst == kSRegSize)) { + mnemonic = NULL; + } + break; + case "urecpe_asimdmisc_r"_h: + case "ursqrte_asimdmisc_r"_h: + // For urecpe and ursqrte, only S-sized elements are supported. The MSB + // of the size field is always set by the instruction (0b1x) so we need + // only check and discard D-sized elements here. + VIXL_ASSERT((ls_dst == kSRegSize) || (ls_dst == kDRegSize)); + VIXL_FALLTHROUGH(); + case "clz_asimdmisc_r"_h: + case "cls_asimdmisc_r"_h: + case "rev64_asimdmisc_r"_h: + if (ls_dst == kDRegSize) { + mnemonic = NULL; + } + break; + } + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::VisitNEON2RegMiscFP16(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.'?30:84h, 'Vn.'?30:84h"; + const char *suffix = NULL; + + switch (form_hash_) { + case "fcmeq_asimdmiscfp16_fz"_h: + case "fcmge_asimdmiscfp16_fz"_h: + case "fcmgt_asimdmiscfp16_fz"_h: + case "fcmle_asimdmiscfp16_fz"_h: + case "fcmlt_asimdmiscfp16_fz"_h: + suffix = ", #0.0"; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::DisassembleNEON3SameLogical(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + + switch (form_hash_) { + case "orr_asimdsame_only"_h: + if (instr->GetRm() == instr->GetRn()) { + mnemonic = "mov"; + form = "'Vd.%s, 'Vn.%s"; + } + break; + case "pmul_asimdsame_only"_h: + if (instr->GetNEONSize() != 0) { + mnemonic = NULL; + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEON3SameFHM(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Vd.'?30:42s, 'Vn.'?30:42h, 'Vm.'?30:42h"); +} + +void Disassembler::DisassembleNEON3SameNoD(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + static const NEONFormatMap map = + {{23, 22, 30}, + {NF_8B, NF_16B, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_UNDEF}}; + NEONFormatDecoder nfd(instr, &map); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::VisitNEON3Same(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + NEONFormatDecoder nfd(instr); + + if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) { + nfd.SetFormatMaps(nfd.FPFormatMap()); + } + + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if (vform_dst != kFormatUndefined) { + uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst); + switch (form_hash_) { + case "sqdmulh_asimdsame_only"_h: + case "sqrdmulh_asimdsame_only"_h: + if ((ls_dst == kBRegSize) || (ls_dst == kDRegSize)) { + mnemonic = NULL; + } + break; + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::VisitNEON3SameFP16(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + NEONFormatDecoder nfd(instr); + nfd.SetFormatMaps(nfd.FP16FormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::VisitNEON3SameExtra(const Instruction *instr) { + static const NEONFormatMap map_usdot = {{30}, {NF_8B, NF_16B}}; + + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + const char *suffix = NULL; + + NEONFormatDecoder nfd(instr); + + switch (form_hash_) { + case "fcmla_asimdsame2_c"_h: + suffix = ", #'u1211*90"; + break; + case "fcadd_asimdsame2_c"_h: + // Bit 10 is always set, so this gives 90 * 1 or 3. + suffix = ", #'u1212:1010*90"; + break; + case "sdot_asimdsame2_d"_h: + case "udot_asimdsame2_d"_h: + case "usdot_asimdsame2_d"_h: + nfd.SetFormatMap(1, &map_usdot); + nfd.SetFormatMap(2, &map_usdot); + break; + default: + // sqrdml[as]h - nothing to do. + break; + } + + Format(instr, mnemonic, nfd.Substitute(form), suffix); +} + + +void Disassembler::VisitNEON3Different(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s"; + + NEONFormatDecoder nfd(instr); + nfd.SetFormatMap(0, nfd.LongIntegerFormatMap()); + + switch (form_hash_) { + case "saddw_asimddiff_w"_h: + case "ssubw_asimddiff_w"_h: + case "uaddw_asimddiff_w"_h: + case "usubw_asimddiff_w"_h: + nfd.SetFormatMap(1, nfd.LongIntegerFormatMap()); + break; + case "addhn_asimddiff_n"_h: + case "raddhn_asimddiff_n"_h: + case "rsubhn_asimddiff_n"_h: + case "subhn_asimddiff_n"_h: + nfd.SetFormatMaps(nfd.LongIntegerFormatMap()); + nfd.SetFormatMap(0, nfd.IntegerFormatMap()); + break; + case "pmull_asimddiff_l"_h: + if (nfd.GetVectorFormat(0) != kFormat8H) { + mnemonic = NULL; + } + break; + case "sqdmlal_asimddiff_l"_h: + case "sqdmlsl_asimddiff_l"_h: + case "sqdmull_asimddiff_l"_h: + if (nfd.GetVectorFormat(0) == kFormat8H) { + mnemonic = NULL; + } + break; + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEONFPAcrossLanes(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Sd, 'Vn.4s"; + if ((instr->GetNEONQ() == 0) || (instr->ExtractBit(22) == 1)) { + mnemonic = NULL; + } + Format(instr, mnemonic, form); +} + +void Disassembler::DisassembleNEONFP16AcrossLanes(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Hd, 'Vn.'?30:84h"); +} + +void Disassembler::VisitNEONAcrossLanes(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, 'Vn.%s"; + + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::ScalarFormatMap(), + NEONFormatDecoder::IntegerFormatMap()); + + switch (form_hash_) { + case "saddlv_asimdall_only"_h: + case "uaddlv_asimdall_only"_h: + nfd.SetFormatMap(0, nfd.LongScalarFormatMap()); + } + + VectorFormat vform_src = nfd.GetVectorFormat(1); + if ((vform_src == kFormat2S) || (vform_src == kFormat2D)) { + mnemonic = NULL; + } + + Format(instr, + mnemonic, + nfd.Substitute(form, + NEONFormatDecoder::kPlaceholder, + NEONFormatDecoder::kFormat)); +} + +void Disassembler::VisitNEONByIndexedElement(const Instruction *instr) { + const char *form = "'Vd.%s, 'Vn.%s, 'Vf.%s['IVByElemIndex]"; + static const NEONFormatMap map_v = + {{23, 22, 30}, + {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_2S, NF_4S, NF_UNDEF, NF_UNDEF}}; + static const NEONFormatMap map_s = {{23, 22}, + {NF_UNDEF, NF_H, NF_S, NF_UNDEF}}; + NEONFormatDecoder nfd(instr, &map_v, &map_v, &map_s); + Format(instr, mnemonic_.c_str(), nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEONMulByElementLong(const Instruction *instr) { + const char *form = "'Vd.%s, 'Vn.%s, 'Vf.%s['IVByElemIndex]"; + // TODO: Disallow undefined element types for this instruction. + static const NEONFormatMap map_ta = {{23, 22}, {NF_UNDEF, NF_4S, NF_2D}}; + NEONFormatDecoder nfd(instr, + &map_ta, + NEONFormatDecoder::IntegerFormatMap(), + NEONFormatDecoder::ScalarFormatMap()); + Format(instr, nfd.Mnemonic(mnemonic_.c_str()), nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEONDotProdByElement(const Instruction *instr) { + const char *form = instr->ExtractBit(30) ? "'Vd.4s, 'Vn.16" : "'Vd.2s, 'Vn.8"; + const char *suffix = "b, 'Vm.4b['u1111:2121]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::DisassembleNEONFPMulByElement(const Instruction *instr) { + const char *form = "'Vd.%s, 'Vn.%s, 'Vf.%s['IVByElemIndex]"; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::FPFormatMap(), + NEONFormatDecoder::FPFormatMap(), + NEONFormatDecoder::FPScalarFormatMap()); + Format(instr, mnemonic_.c_str(), nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEONHalfFPMulByElement(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "'Vd.'?30:84h, 'Vn.'?30:84h, " + "'Ve.h['IVByElemIndex]"); +} + +void Disassembler::DisassembleNEONFPMulByElementLong(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "'Vd.'?30:42s, 'Vn.'?30:42h, " + "'Ve.h['IVByElemIndexFHM]"); +} + +void Disassembler::DisassembleNEONComplexMulByElement( + const Instruction *instr) { + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s['IVByElemIndexRot], #'u1413*90"; + // TODO: Disallow undefined element types for this instruction. + static const NEONFormatMap map_cn = + {{23, 22, 30}, + {NF_UNDEF, NF_UNDEF, NF_4H, NF_8H, NF_UNDEF, NF_4S, NF_UNDEF, NF_UNDEF}}; + NEONFormatDecoder nfd(instr, + &map_cn, + &map_cn, + NEONFormatDecoder::ScalarFormatMap()); + Format(instr, mnemonic_.c_str(), nfd.Substitute(form)); +} + +void Disassembler::VisitNEONCopy(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "(NEONCopy)"; + + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::TriangularFormatMap(), + NEONFormatDecoder::TriangularScalarFormatMap()); + + switch (form_hash_) { + case "ins_asimdins_iv_v"_h: + mnemonic = "mov"; + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + form = "'Vd.%s['IVInsIndex1], 'Vn.%s['IVInsIndex2]"; + break; + case "ins_asimdins_ir_r"_h: + mnemonic = "mov"; + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + if (nfd.GetVectorFormat() == kFormatD) { + form = "'Vd.%s['IVInsIndex1], 'Xn"; + } else { + form = "'Vd.%s['IVInsIndex1], 'Wn"; + } + break; + case "umov_asimdins_w_w"_h: + case "umov_asimdins_x_x"_h: + if (instr->Mask(NEON_Q) || ((instr->GetImmNEON5() & 7) == 4)) { + mnemonic = "mov"; + } + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + if (nfd.GetVectorFormat() == kFormatD) { + form = "'Xd, 'Vn.%s['IVInsIndex1]"; + } else { + form = "'Wd, 'Vn.%s['IVInsIndex1]"; + } + break; + case "smov_asimdins_w_w"_h: + case "smov_asimdins_x_x"_h: { + nfd.SetFormatMap(0, nfd.TriangularScalarFormatMap()); + VectorFormat vform = nfd.GetVectorFormat(); + if ((vform == kFormatD) || + ((vform == kFormatS) && (instr->ExtractBit(30) == 0))) { + mnemonic = NULL; + } + form = "'R30d, 'Vn.%s['IVInsIndex1]"; + break; + } + case "dup_asimdins_dv_v"_h: + form = "'Vd.%s, 'Vn.%s['IVInsIndex1]"; + break; + case "dup_asimdins_dr_r"_h: + if (nfd.GetVectorFormat() == kFormat2D) { + form = "'Vd.%s, 'Xn"; + } else { + form = "'Vd.%s, 'Wn"; + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONExtract(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'Vm.%s, 'IVExtract"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + if ((instr->GetImmNEONExt() > 7) && (instr->GetNEONQ() == 0)) { + mnemonic = NULL; + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreMultiStruct(const Instruction *instr) { + const char *mnemonic = NULL; + const char *form = NULL; + const char *form_1v = "{'Vt.%1$s}, ['Xns]"; + const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns]"; + const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns]"; + const char *form_4v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreMultiStructMask)) { + case NEON_LD1_1v: + mnemonic = "ld1"; + form = form_1v; + break; + case NEON_LD1_2v: + mnemonic = "ld1"; + form = form_2v; + break; + case NEON_LD1_3v: + mnemonic = "ld1"; + form = form_3v; + break; + case NEON_LD1_4v: + mnemonic = "ld1"; + form = form_4v; + break; + case NEON_LD2: + mnemonic = "ld2"; + form = form_2v; + break; + case NEON_LD3: + mnemonic = "ld3"; + form = form_3v; + break; + case NEON_LD4: + mnemonic = "ld4"; + form = form_4v; + break; + case NEON_ST1_1v: + mnemonic = "st1"; + form = form_1v; + break; + case NEON_ST1_2v: + mnemonic = "st1"; + form = form_2v; + break; + case NEON_ST1_3v: + mnemonic = "st1"; + form = form_3v; + break; + case NEON_ST1_4v: + mnemonic = "st1"; + form = form_4v; + break; + case NEON_ST2: + mnemonic = "st2"; + form = form_2v; + break; + case NEON_ST3: + mnemonic = "st3"; + form = form_3v; + break; + case NEON_ST4: + mnemonic = "st4"; + form = form_4v; + break; + default: + break; + } + + // Work out unallocated encodings. + bool allocated = (mnemonic != NULL); + switch (instr->Mask(NEONLoadStoreMultiStructMask)) { + case NEON_LD2: + case NEON_LD3: + case NEON_LD4: + case NEON_ST2: + case NEON_ST3: + case NEON_ST4: + // LD[2-4] and ST[2-4] cannot use .1d format. + allocated = (instr->GetNEONQ() != 0) || (instr->GetNEONLSSize() != 3); + break; + default: + break; + } + if (allocated) { + VIXL_ASSERT(mnemonic != NULL); + VIXL_ASSERT(form != NULL); + } else { + mnemonic = "unallocated"; + form = "(NEONLoadStoreMultiStruct)"; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreMultiStructPostIndex( + const Instruction *instr) { + const char *mnemonic = NULL; + const char *form = NULL; + const char *form_1v = "{'Vt.%1$s}, ['Xns], 'Xmr1"; + const char *form_2v = "{'Vt.%1$s, 'Vt2.%1$s}, ['Xns], 'Xmr2"; + const char *form_3v = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s}, ['Xns], 'Xmr3"; + const char *form_4v = + "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmr4"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) { + case NEON_LD1_1v_post: + mnemonic = "ld1"; + form = form_1v; + break; + case NEON_LD1_2v_post: + mnemonic = "ld1"; + form = form_2v; + break; + case NEON_LD1_3v_post: + mnemonic = "ld1"; + form = form_3v; + break; + case NEON_LD1_4v_post: + mnemonic = "ld1"; + form = form_4v; + break; + case NEON_LD2_post: + mnemonic = "ld2"; + form = form_2v; + break; + case NEON_LD3_post: + mnemonic = "ld3"; + form = form_3v; + break; + case NEON_LD4_post: + mnemonic = "ld4"; + form = form_4v; + break; + case NEON_ST1_1v_post: + mnemonic = "st1"; + form = form_1v; + break; + case NEON_ST1_2v_post: + mnemonic = "st1"; + form = form_2v; + break; + case NEON_ST1_3v_post: + mnemonic = "st1"; + form = form_3v; + break; + case NEON_ST1_4v_post: + mnemonic = "st1"; + form = form_4v; + break; + case NEON_ST2_post: + mnemonic = "st2"; + form = form_2v; + break; + case NEON_ST3_post: + mnemonic = "st3"; + form = form_3v; + break; + case NEON_ST4_post: + mnemonic = "st4"; + form = form_4v; + break; + default: + break; + } + + // Work out unallocated encodings. + bool allocated = (mnemonic != NULL); + switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) { + case NEON_LD2_post: + case NEON_LD3_post: + case NEON_LD4_post: + case NEON_ST2_post: + case NEON_ST3_post: + case NEON_ST4_post: + // LD[2-4] and ST[2-4] cannot use .1d format. + allocated = (instr->GetNEONQ() != 0) || (instr->GetNEONLSSize() != 3); + break; + default: + break; + } + if (allocated) { + VIXL_ASSERT(mnemonic != NULL); + VIXL_ASSERT(form != NULL); + } else { + mnemonic = "unallocated"; + form = "(NEONLoadStoreMultiStructPostIndex)"; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreSingleStruct(const Instruction *instr) { + const char *mnemonic = NULL; + const char *form = NULL; + + const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns]"; + const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns]"; + const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns]"; + const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns]"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreSingleStructMask)) { + case NEON_LD1_b: + mnemonic = "ld1"; + form = form_1b; + break; + case NEON_LD1_h: + mnemonic = "ld1"; + form = form_1h; + break; + case NEON_LD1_s: + mnemonic = "ld1"; + VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d); + form = ((instr->GetNEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_ST1_b: + mnemonic = "st1"; + form = form_1b; + break; + case NEON_ST1_h: + mnemonic = "st1"; + form = form_1h; + break; + case NEON_ST1_s: + mnemonic = "st1"; + VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d); + form = ((instr->GetNEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_LD1R: + mnemonic = "ld1r"; + form = "{'Vt.%s}, ['Xns]"; + break; + case NEON_LD2_b: + case NEON_ST2_b: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns]"; + break; + case NEON_LD2_h: + case NEON_ST2_h: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns]"; + break; + case NEON_LD2_s: + case NEON_ST2_s: + VIXL_STATIC_ASSERT((NEON_ST2_s | (1 << NEONLSSize_offset)) == NEON_ST2_d); + VIXL_STATIC_ASSERT((NEON_LD2_s | (1 << NEONLSSize_offset)) == NEON_LD2_d); + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld2" : "st2"; + if ((instr->GetNEONLSSize() & 1) == 0) { + form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns]"; + } else { + form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns]"; + } + break; + case NEON_LD2R: + mnemonic = "ld2r"; + form = "{'Vt.%s, 'Vt2.%s}, ['Xns]"; + break; + case NEON_LD3_b: + case NEON_ST3_b: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns]"; + break; + case NEON_LD3_h: + case NEON_ST3_h: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns]"; + break; + case NEON_LD3_s: + case NEON_ST3_s: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld3" : "st3"; + if ((instr->GetNEONLSSize() & 1) == 0) { + form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns]"; + } else { + form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns]"; + } + break; + case NEON_LD3R: + mnemonic = "ld3r"; + form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns]"; + break; + case NEON_LD4_b: + case NEON_ST4_b: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld4" : "st4"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns]"; + break; + case NEON_LD4_h: + case NEON_ST4_h: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld4" : "st4"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns]"; + break; + case NEON_LD4_s: + case NEON_ST4_s: + VIXL_STATIC_ASSERT((NEON_LD4_s | (1 << NEONLSSize_offset)) == NEON_LD4_d); + VIXL_STATIC_ASSERT((NEON_ST4_s | (1 << NEONLSSize_offset)) == NEON_ST4_d); + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld4" : "st4"; + if ((instr->GetNEONLSSize() & 1) == 0) { + form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns]"; + } else { + form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns]"; + } + break; + case NEON_LD4R: + mnemonic = "ld4r"; + form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns]"; + break; + default: + break; + } + + // Work out unallocated encodings. + bool allocated = (mnemonic != NULL); + switch (instr->Mask(NEONLoadStoreSingleStructMask)) { + case NEON_LD1_h: + case NEON_LD2_h: + case NEON_LD3_h: + case NEON_LD4_h: + case NEON_ST1_h: + case NEON_ST2_h: + case NEON_ST3_h: + case NEON_ST4_h: + VIXL_ASSERT(allocated); + allocated = ((instr->GetNEONLSSize() & 1) == 0); + break; + case NEON_LD1_s: + case NEON_LD2_s: + case NEON_LD3_s: + case NEON_LD4_s: + case NEON_ST1_s: + case NEON_ST2_s: + case NEON_ST3_s: + case NEON_ST4_s: + VIXL_ASSERT(allocated); + allocated = (instr->GetNEONLSSize() <= 1) && + ((instr->GetNEONLSSize() == 0) || (instr->GetNEONS() == 0)); + break; + case NEON_LD1R: + case NEON_LD2R: + case NEON_LD3R: + case NEON_LD4R: + VIXL_ASSERT(allocated); + allocated = (instr->GetNEONS() == 0); + break; + default: + break; + } + if (allocated) { + VIXL_ASSERT(mnemonic != NULL); + VIXL_ASSERT(form != NULL); + } else { + mnemonic = "unallocated"; + form = "(NEONLoadStoreSingleStruct)"; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONLoadStoreSingleStructPostIndex( + const Instruction *instr) { + const char *mnemonic = NULL; + const char *form = NULL; + + const char *form_1b = "{'Vt.b}['IVLSLane0], ['Xns], 'Xmb1"; + const char *form_1h = "{'Vt.h}['IVLSLane1], ['Xns], 'Xmb2"; + const char *form_1s = "{'Vt.s}['IVLSLane2], ['Xns], 'Xmb4"; + const char *form_1d = "{'Vt.d}['IVLSLane3], ['Xns], 'Xmb8"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap()); + + switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) { + case NEON_LD1_b_post: + mnemonic = "ld1"; + form = form_1b; + break; + case NEON_LD1_h_post: + mnemonic = "ld1"; + form = form_1h; + break; + case NEON_LD1_s_post: + mnemonic = "ld1"; + VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d); + form = ((instr->GetNEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_ST1_b_post: + mnemonic = "st1"; + form = form_1b; + break; + case NEON_ST1_h_post: + mnemonic = "st1"; + form = form_1h; + break; + case NEON_ST1_s_post: + mnemonic = "st1"; + VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d); + form = ((instr->GetNEONLSSize() & 1) == 0) ? form_1s : form_1d; + break; + case NEON_LD1R_post: + mnemonic = "ld1r"; + form = "{'Vt.%s}, ['Xns], 'Xmz1"; + break; + case NEON_LD2_b_post: + case NEON_ST2_b_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.b, 'Vt2.b}['IVLSLane0], ['Xns], 'Xmb2"; + break; + case NEON_ST2_h_post: + case NEON_LD2_h_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld2" : "st2"; + form = "{'Vt.h, 'Vt2.h}['IVLSLane1], ['Xns], 'Xmb4"; + break; + case NEON_LD2_s_post: + case NEON_ST2_s_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld2" : "st2"; + if ((instr->GetNEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s}['IVLSLane2], ['Xns], 'Xmb8"; + else + form = "{'Vt.d, 'Vt2.d}['IVLSLane3], ['Xns], 'Xmb16"; + break; + case NEON_LD2R_post: + mnemonic = "ld2r"; + form = "{'Vt.%s, 'Vt2.%s}, ['Xns], 'Xmz2"; + break; + case NEON_LD3_b_post: + case NEON_ST3_b_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b}['IVLSLane0], ['Xns], 'Xmb3"; + break; + case NEON_LD3_h_post: + case NEON_ST3_h_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld3" : "st3"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h}['IVLSLane1], ['Xns], 'Xmb6"; + break; + case NEON_LD3_s_post: + case NEON_ST3_s_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld3" : "st3"; + if ((instr->GetNEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s, 'Vt3.s}['IVLSLane2], ['Xns], 'Xmb12"; + else + form = "{'Vt.d, 'Vt2.d, 'Vt3.d}['IVLSLane3], ['Xns], 'Xmb24"; + break; + case NEON_LD3R_post: + mnemonic = "ld3r"; + form = "{'Vt.%s, 'Vt2.%s, 'Vt3.%s}, ['Xns], 'Xmz3"; + break; + case NEON_LD4_b_post: + case NEON_ST4_b_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld4" : "st4"; + form = "{'Vt.b, 'Vt2.b, 'Vt3.b, 'Vt4.b}['IVLSLane0], ['Xns], 'Xmb4"; + break; + case NEON_LD4_h_post: + case NEON_ST4_h_post: + mnemonic = (instr->GetLdStXLoad()) == 1 ? "ld4" : "st4"; + form = "{'Vt.h, 'Vt2.h, 'Vt3.h, 'Vt4.h}['IVLSLane1], ['Xns], 'Xmb8"; + break; + case NEON_LD4_s_post: + case NEON_ST4_s_post: + mnemonic = (instr->GetLdStXLoad() == 1) ? "ld4" : "st4"; + if ((instr->GetNEONLSSize() & 1) == 0) + form = "{'Vt.s, 'Vt2.s, 'Vt3.s, 'Vt4.s}['IVLSLane2], ['Xns], 'Xmb16"; + else + form = "{'Vt.d, 'Vt2.d, 'Vt3.d, 'Vt4.d}['IVLSLane3], ['Xns], 'Xmb32"; + break; + case NEON_LD4R_post: + mnemonic = "ld4r"; + form = "{'Vt.%1$s, 'Vt2.%1$s, 'Vt3.%1$s, 'Vt4.%1$s}, ['Xns], 'Xmz4"; + break; + default: + break; + } + + // Work out unallocated encodings. + bool allocated = (mnemonic != NULL); + switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) { + case NEON_LD1_h_post: + case NEON_LD2_h_post: + case NEON_LD3_h_post: + case NEON_LD4_h_post: + case NEON_ST1_h_post: + case NEON_ST2_h_post: + case NEON_ST3_h_post: + case NEON_ST4_h_post: + VIXL_ASSERT(allocated); + allocated = ((instr->GetNEONLSSize() & 1) == 0); + break; + case NEON_LD1_s_post: + case NEON_LD2_s_post: + case NEON_LD3_s_post: + case NEON_LD4_s_post: + case NEON_ST1_s_post: + case NEON_ST2_s_post: + case NEON_ST3_s_post: + case NEON_ST4_s_post: + VIXL_ASSERT(allocated); + allocated = (instr->GetNEONLSSize() <= 1) && + ((instr->GetNEONLSSize() == 0) || (instr->GetNEONS() == 0)); + break; + case NEON_LD1R_post: + case NEON_LD2R_post: + case NEON_LD3R_post: + case NEON_LD4R_post: + VIXL_ASSERT(allocated); + allocated = (instr->GetNEONS() == 0); + break; + default: + break; + } + if (allocated) { + VIXL_ASSERT(mnemonic != NULL); + VIXL_ASSERT(form != NULL); + } else { + mnemonic = "unallocated"; + form = "(NEONLoadStoreSingleStructPostIndex)"; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONModifiedImmediate(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vt.%s, 'IVMIImm8, lsl 'IVMIShiftAmt1"; + + static const NEONFormatMap map_h = {{30}, {NF_4H, NF_8H}}; + static const NEONFormatMap map_s = {{30}, {NF_2S, NF_4S}}; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + + switch (form_hash_) { + case "movi_asimdimm_n_b"_h: + form = "'Vt.%s, 'IVMIImm8"; + break; + case "bic_asimdimm_l_hl"_h: + case "movi_asimdimm_l_hl"_h: + case "mvni_asimdimm_l_hl"_h: + case "orr_asimdimm_l_hl"_h: + nfd.SetFormatMap(0, &map_h); + break; + case "movi_asimdimm_m_sm"_h: + case "mvni_asimdimm_m_sm"_h: + form = "'Vt.%s, 'IVMIImm8, msl 'IVMIShiftAmt2"; + VIXL_FALLTHROUGH(); + case "bic_asimdimm_l_sl"_h: + case "movi_asimdimm_l_sl"_h: + case "mvni_asimdimm_l_sl"_h: + case "orr_asimdimm_l_sl"_h: + nfd.SetFormatMap(0, &map_s); + break; + case "movi_asimdimm_d_ds"_h: + form = "'Dd, 'IVMIImm"; + break; + case "movi_asimdimm_d2_d"_h: + form = "'Vt.2d, 'IVMIImm"; + break; + case "fmov_asimdimm_h_h"_h: + form = "'Vt.%s, 'IFPNeon"; + nfd.SetFormatMap(0, &map_h); + break; + case "fmov_asimdimm_s_s"_h: + form = "'Vt.%s, 'IFPNeon"; + nfd.SetFormatMap(0, &map_s); + break; + case "fmov_asimdimm_d2_d"_h: + form = "'Vt.2d, 'IFPNeon"; + break; + } + + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEONScalar2RegMiscOnlyD( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Dd, 'Dn"; + const char *suffix = ", #0"; + if (instr->GetNEONSize() != 3) { + mnemonic = NULL; + } + switch (form_hash_) { + case "abs_asisdmisc_r"_h: + case "neg_asisdmisc_r"_h: + suffix = NULL; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::DisassembleNEONFPScalar2RegMisc(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn"; + const char *suffix = NULL; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap()); + switch (form_hash_) { + case "fcmeq_asisdmisc_fz"_h: + case "fcmge_asisdmisc_fz"_h: + case "fcmgt_asisdmisc_fz"_h: + case "fcmle_asisdmisc_fz"_h: + case "fcmlt_asisdmisc_fz"_h: + suffix = ", #0.0"; + break; + case "fcvtxn_asisdmisc_n"_h: + if (nfd.GetVectorFormat(0) == kFormatS) { // Source format. + mnemonic = NULL; + } + form = "'Sd, 'Dn"; + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form), suffix); +} + +void Disassembler::VisitNEONScalar2RegMisc(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + switch (form_hash_) { + case "sqxtn_asisdmisc_n"_h: + case "sqxtun_asisdmisc_n"_h: + case "uqxtn_asisdmisc_n"_h: + nfd.SetFormatMap(1, nfd.LongScalarFormatMap()); + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + +void Disassembler::VisitNEONScalar2RegMiscFP16(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Hd, 'Hn"; + const char *suffix = NULL; + + switch (form_hash_) { + case "fcmeq_asisdmiscfp16_fz"_h: + case "fcmge_asisdmiscfp16_fz"_h: + case "fcmgt_asisdmiscfp16_fz"_h: + case "fcmle_asisdmiscfp16_fz"_h: + case "fcmlt_asisdmiscfp16_fz"_h: + suffix = ", #0.0"; + } + Format(instr, mnemonic, form, suffix); +} + + +void Disassembler::VisitNEONScalar3Diff(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, %sm"; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::LongScalarFormatMap(), + NEONFormatDecoder::ScalarFormatMap()); + if (nfd.GetVectorFormat(0) == kFormatH) { + mnemonic = NULL; + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + +void Disassembler::DisassembleNEONFPScalar3Same(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, %sm"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap()); + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + +void Disassembler::DisassembleNEONScalar3SameOnlyD(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Dd, 'Dn, 'Dm"; + if (instr->GetNEONSize() != 3) { + mnemonic = NULL; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitNEONScalar3Same(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, %sm"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + VectorFormat vform = nfd.GetVectorFormat(0); + switch (form_hash_) { + case "srshl_asisdsame_only"_h: + case "urshl_asisdsame_only"_h: + case "sshl_asisdsame_only"_h: + case "ushl_asisdsame_only"_h: + if (vform != kFormatD) { + mnemonic = NULL; + } + break; + case "sqdmulh_asisdsame_only"_h: + case "sqrdmulh_asisdsame_only"_h: + if ((vform == kFormatB) || (vform == kFormatD)) { + mnemonic = NULL; + } + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + +void Disassembler::VisitNEONScalar3SameFP16(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Hd, 'Hn, 'Hm"); +} + +void Disassembler::VisitNEONScalar3SameExtra(const Instruction *instr) { + USE(instr); + // Nothing to do - handled by VisitNEONScalar3Same. + VIXL_UNREACHABLE(); +} + +void Disassembler::DisassembleNEONScalarSatMulLongIndex( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, 'Vf.%s['IVByElemIndex]"; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::LongScalarFormatMap(), + NEONFormatDecoder::ScalarFormatMap()); + if (nfd.GetVectorFormat(0) == kFormatH) { + mnemonic = NULL; + } + Format(instr, + mnemonic, + nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat)); +} + +void Disassembler::DisassembleNEONFPScalarMulIndex(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, 'Vf.%s['IVByElemIndex]"; + static const NEONFormatMap map = {{23, 22}, {NF_H, NF_UNDEF, NF_S, NF_D}}; + NEONFormatDecoder nfd(instr, &map); + Format(instr, + mnemonic, + nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat)); +} + +void Disassembler::VisitNEONScalarByIndexedElement(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, 'Vf.%s['IVByElemIndex]"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap()); + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if ((vform_dst == kFormatB) || (vform_dst == kFormatD)) { + mnemonic = NULL; + } + Format(instr, + mnemonic, + nfd.Substitute(form, nfd.kPlaceholder, nfd.kPlaceholder, nfd.kFormat)); +} + + +void Disassembler::VisitNEONScalarCopy(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(NEONScalarCopy)"; + + NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap()); + + if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) { + mnemonic = "mov"; + form = "%sd, 'Vn.%s['IVInsIndex1]"; + } + + Format(instr, mnemonic, nfd.Substitute(form, nfd.kPlaceholder, nfd.kFormat)); +} + + +void Disassembler::VisitNEONScalarPairwise(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + if (form_hash_ == "addp_asisdpair_only"_h) { + // All pairwise operations except ADDP use bit U to differentiate FP16 + // from FP32/FP64 variations. + if (instr->GetNEONSize() != 3) { + mnemonic = NULL; + } + Format(instr, mnemonic, "'Dd, 'Vn.2d"); + } else { + const char *form = "%sd, 'Vn.2%s"; + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::FPScalarPairwiseFormatMap()); + + Format(instr, + mnemonic, + nfd.Substitute(form, + NEONFormatDecoder::kPlaceholder, + NEONFormatDecoder::kFormat)); + } +} + +void Disassembler::DisassembleNEONScalarShiftImmOnlyD( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Dd, 'Dn, "; + const char *suffix = "'IsR"; + + if (instr->ExtractBit(22) == 0) { + // Only D registers are supported. + mnemonic = NULL; + } + + switch (form_hash_) { + case "shl_asisdshf_r"_h: + case "sli_asisdshf_r"_h: + suffix = "'IsL"; + } + + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::DisassembleNEONScalarShiftRightNarrowImm( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, 'IsR"; + static const NEONFormatMap map_dst = + {{22, 21, 20, 19}, {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S}}; + static const NEONFormatMap map_src = + {{22, 21, 20, 19}, {NF_UNDEF, NF_H, NF_S, NF_S, NF_D, NF_D, NF_D, NF_D}}; + NEONFormatDecoder nfd(instr, &map_dst, &map_src); + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form)); +} + +void Disassembler::VisitNEONScalarShiftImmediate(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "%sd, %sn, "; + const char *suffix = "'IsR"; + + // clang-format off + static const NEONFormatMap map = {{22, 21, 20, 19}, + {NF_UNDEF, NF_B, NF_H, NF_H, + NF_S, NF_S, NF_S, NF_S, + NF_D, NF_D, NF_D, NF_D, + NF_D, NF_D, NF_D, NF_D}}; + // clang-format on + NEONFormatDecoder nfd(instr, &map); + switch (form_hash_) { + case "sqshlu_asisdshf_r"_h: + case "sqshl_asisdshf_r"_h: + case "uqshl_asisdshf_r"_h: + suffix = "'IsL"; + break; + default: + if (nfd.GetVectorFormat(0) == kFormatB) { + mnemonic = NULL; + } + } + Format(instr, mnemonic, nfd.SubstitutePlaceholders(form), suffix); +} + +void Disassembler::DisassembleNEONShiftLeftLongImm(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s"; + const char *suffix = ", 'IsL"; + + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::ShiftLongNarrowImmFormatMap(), + NEONFormatDecoder::ShiftImmFormatMap()); + + if (instr->GetImmNEONImmb() == 0 && + CountSetBits(instr->GetImmNEONImmh(), 32) == 1) { // xtl variant. + VIXL_ASSERT((form_hash_ == "sshll_asimdshf_l"_h) || + (form_hash_ == "ushll_asimdshf_l"_h)); + mnemonic = (form_hash_ == "sshll_asimdshf_l"_h) ? "sxtl" : "uxtl"; + suffix = NULL; + } + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form), suffix); +} + +void Disassembler::DisassembleNEONShiftRightImm(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'IsR"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ShiftImmFormatMap()); + + VectorFormat vform_dst = nfd.GetVectorFormat(0); + if (vform_dst != kFormatUndefined) { + uint32_t ls_dst = LaneSizeInBitsFromFormat(vform_dst); + switch (form_hash_) { + case "scvtf_asimdshf_c"_h: + case "ucvtf_asimdshf_c"_h: + case "fcvtzs_asimdshf_c"_h: + case "fcvtzu_asimdshf_c"_h: + if (ls_dst == kBRegSize) { + mnemonic = NULL; + } + break; + } + } + Format(instr, mnemonic, nfd.Substitute(form)); +} + +void Disassembler::DisassembleNEONShiftRightNarrowImm( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'IsR"; + + NEONFormatDecoder nfd(instr, + NEONFormatDecoder::ShiftImmFormatMap(), + NEONFormatDecoder::ShiftLongNarrowImmFormatMap()); + Format(instr, nfd.Mnemonic(mnemonic), nfd.Substitute(form)); +} + +void Disassembler::VisitNEONShiftImmediate(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Vd.%s, 'Vn.%s, 'IsL"; + NEONFormatDecoder nfd(instr, NEONFormatDecoder::ShiftImmFormatMap()); + Format(instr, mnemonic, nfd.Substitute(form)); +} + + +void Disassembler::VisitNEONTable(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char form_1v[] = "'Vd.%%s, {'Vn.16b}, 'Vm.%%s"; + const char form_2v[] = "'Vd.%%s, {'Vn.16b, v%d.16b}, 'Vm.%%s"; + const char form_3v[] = "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b}, 'Vm.%%s"; + const char form_4v[] = + "'Vd.%%s, {'Vn.16b, v%d.16b, v%d.16b, v%d.16b}, 'Vm.%%s"; + const char *form = form_1v; + + NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap()); + + switch (form_hash_) { + case "tbl_asimdtbl_l2_2"_h: + case "tbx_asimdtbl_l2_2"_h: + form = form_2v; + break; + case "tbl_asimdtbl_l3_3"_h: + case "tbx_asimdtbl_l3_3"_h: + form = form_3v; + break; + case "tbl_asimdtbl_l4_4"_h: + case "tbx_asimdtbl_l4_4"_h: + form = form_4v; + break; + } + VIXL_ASSERT(form != NULL); + + char re_form[sizeof(form_4v) + 6]; // 3 * two-digit substitutions => 6 + int reg_num = instr->GetRn(); + snprintf(re_form, + sizeof(re_form), + form, + (reg_num + 1) % kNumberOfVRegisters, + (reg_num + 2) % kNumberOfVRegisters, + (reg_num + 3) % kNumberOfVRegisters); + + Format(instr, mnemonic, nfd.Substitute(re_form)); +} + + +void Disassembler::VisitNEONPerm(const Instruction *instr) { + NEONFormatDecoder nfd(instr); + FormatWithDecodedMnemonic(instr, nfd.Substitute("'Vd.%s, 'Vn.%s, 'Vm.%s")); +} + +void Disassembler::Disassemble_Vd4S_Vn16B_Vm16B(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Vd.4s, 'Vn.16b, 'Vm.16b"); +} + +void Disassembler:: + VisitSVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #1]"); +} + +void Disassembler::VisitSVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw #2]"); +} + +void Disassembler::VisitSVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl/z, ['Xns, 'Zm.s, '?22:suxtw]"); +} + +void Disassembler::VisitSVE32BitGatherLoad_VectorPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s]"; + const char *form_imm = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016]"; + const char *form_imm_h = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*2]"; + const char *form_imm_w = "{'Zt.s}, 'Pgl/z, ['Zn.s, #'u2016*4]"; + + const char *mnemonic = mnemonic_.c_str(); + switch (form_hash_) { + case "ld1h_z_p_ai_s"_h: + case "ld1sh_z_p_ai_s"_h: + case "ldff1h_z_p_ai_s"_h: + case "ldff1sh_z_p_ai_s"_h: + form_imm = form_imm_h; + break; + case "ld1w_z_p_ai_s"_h: + case "ldff1w_z_p_ai_s"_h: + form_imm = form_imm_w; + break; + } + if (instr->ExtractBits(20, 16) != 0) form = form_imm; + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.s, '?22:suxtw"; + const char *suffix = NULL; + + switch ( + instr->Mask(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsetsMask)) { + case PRFB_i_p_bz_s_x32_scaled: + mnemonic = "prfb"; + suffix = "]"; + break; + case PRFD_i_p_bz_s_x32_scaled: + mnemonic = "prfd"; + suffix = " #3]"; + break; + case PRFH_i_p_bz_s_x32_scaled: + mnemonic = "prfh"; + suffix = " #1]"; + break; + case PRFW_i_p_bz_s_x32_scaled: + mnemonic = "prfw"; + suffix = " #2]"; + break; + default: + form = "(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVE32BitGatherPrefetch_VectorPlusImm( + const Instruction *instr) { + const char *form = (instr->ExtractBits(20, 16) != 0) + ? "'prefSVEOp, 'Pgl, ['Zn.s, #'u2016]" + : "'prefSVEOp, 'Pgl, ['Zn.s]"; + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitScaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw #'u2423]"); +} + +void Disassembler::VisitSVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "{'Zt.s}, 'Pgl, ['Xns, 'Zm.s, '?14:suxtw]"); +} + +void Disassembler::VisitSVE32BitScatterStore_VectorPlusImm( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "{'Zt.s}, 'Pgl, ['Zn.s"; + const char *suffix = NULL; + + bool is_zero = instr->ExtractBits(20, 16) == 0; + + switch (instr->Mask(SVE32BitScatterStore_VectorPlusImmMask)) { + case ST1B_z_p_ai_s: + mnemonic = "st1b"; + suffix = is_zero ? "]" : ", #'u2016]"; + break; + case ST1H_z_p_ai_s: + mnemonic = "st1h"; + suffix = is_zero ? "]" : ", #'u2016*2]"; + break; + case ST1W_z_p_ai_s: + mnemonic = "st1w"; + suffix = is_zero ? "]" : ", #'u2016*4]"; + break; + default: + form = "(SVE32BitScatterStore_VectorPlusImm)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw " + "#'u2423]"); +} + +void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitScaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, lsl #'u2423]"); +} + +void Disassembler::VisitSVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d]"); +} + +void Disassembler:: + VisitSVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl/z, ['Xns, 'Zm.d, '?22:suxtw]"); +} + +void Disassembler::VisitSVE64BitGatherLoad_VectorPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d]"; + const char *form_imm[4] = {"{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016]", + "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*2]", + "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*4]", + "{'Zt.d}, 'Pgl/z, ['Zn.d, #'u2016*8]"}; + + if (instr->ExtractBits(20, 16) != 0) { + unsigned msz = instr->ExtractBits(24, 23); + bool sign_extend = instr->ExtractBit(14) == 0; + if ((msz == kDRegSizeInBytesLog2) && sign_extend) { + form = "(SVE64BitGatherLoad_VectorPlusImm)"; + } else { + VIXL_ASSERT(msz < ArrayLength(form_imm)); + form = form_imm[msz]; + } + } + + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets( + const Instruction *instr) { + const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d"; + const char *suffix = "]"; + + switch (form_hash_) { + case "prfh_i_p_bz_d_64_scaled"_h: + suffix = ", lsl #1]"; + break; + case "prfs_i_p_bz_d_64_scaled"_h: + suffix = ", lsl #2]"; + break; + case "prfd_i_p_bz_d_64_scaled"_h: + suffix = ", lsl #3]"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler:: + VisitSVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets( + const Instruction *instr) { + const char *form = "'prefSVEOp, 'Pgl, ['Xns, 'Zm.d, '?22:suxtw "; + const char *suffix = "]"; + + switch (form_hash_) { + case "prfh_i_p_bz_d_x32_scaled"_h: + suffix = "#1]"; + break; + case "prfs_i_p_bz_d_x32_scaled"_h: + suffix = "#2]"; + break; + case "prfd_i_p_bz_d_x32_scaled"_h: + suffix = "#3]"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVE64BitGatherPrefetch_VectorPlusImm( + const Instruction *instr) { + const char *form = (instr->ExtractBits(20, 16) != 0) + ? "'prefSVEOp, 'Pgl, ['Zn.d, #'u2016]" + : "'prefSVEOp, 'Pgl, ['Zn.d]"; + + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitScaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, lsl #'u2423]"); +} + +void Disassembler::VisitSVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d]"); +} + +void Disassembler:: + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, + "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw #'u2423]"); +} + +void Disassembler:: + VisitSVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "{'Zt.d}, 'Pgl, ['Xns, 'Zm.d, '?14:suxtw]"); +} + +void Disassembler::VisitSVE64BitScatterStore_VectorPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.d}, 'Pgl, ['Zn.d"; + const char *suffix = "]"; + + if (instr->ExtractBits(20, 16) != 0) { + switch (form_hash_) { + case "st1b_z_p_ai_d"_h: + suffix = ", #'u2016]"; + break; + case "st1h_z_p_ai_d"_h: + suffix = ", #'u2016*2]"; + break; + case "st1w_z_p_ai_d"_h: + suffix = ", #'u2016*4]"; + break; + case "st1d_z_p_ai_d"_h: + suffix = ", #'u2016*8]"; + break; + } + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEBitwiseLogicalWithImm_Unpredicated( + const Instruction *instr) { + if (instr->GetSVEImmLogical() == 0) { + // The immediate encoded in the instruction is not in the expected format. + Format(instr, "unallocated", "(SVEBitwiseImm)"); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'tl, 'Zd.'tl, 'ITriSvel"); + } +} + +void Disassembler::VisitSVEBitwiseLogical_Predicated(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVEBitwiseShiftByImm_Predicated( + const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Zd.'tszp, 'Pgl/m, 'Zd.'tszp, "; + const char *suffix = NULL; + unsigned tsize = (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(9, 8); + + if (tsize == 0) { + mnemonic = "unimplemented"; + form = "(SVEBitwiseShiftByImm_Predicated)"; + } else { + switch (form_hash_) { + case "lsl_z_p_zi"_h: + case "sqshl_z_p_zi"_h: + case "sqshlu_z_p_zi"_h: + case "uqshl_z_p_zi"_h: + suffix = "'ITriSvep"; + break; + case "asrd_z_p_zi"_h: + case "asr_z_p_zi"_h: + case "lsr_z_p_zi"_h: + case "srshr_z_p_zi"_h: + case "urshr_z_p_zi"_h: + suffix = "'ITriSveq"; + break; + default: + mnemonic = "unimplemented"; + form = "(SVEBitwiseShiftByImm_Predicated)"; + break; + } + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEBitwiseShiftByVector_Predicated( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVEBitwiseShiftByWideElements_Predicated( + const Instruction *instr) { + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + Format(instr, "unallocated", "(SVEBitwiseShiftByWideElements_Predicated)"); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.d"); + } +} + +static bool SVEMoveMaskPreferred(uint64_t value, int lane_bytes_log2) { + VIXL_ASSERT(IsUintN(8 << lane_bytes_log2, value)); + + // Duplicate lane-sized value across double word. + switch (lane_bytes_log2) { + case 0: + value *= 0x0101010101010101; + break; + case 1: + value *= 0x0001000100010001; + break; + case 2: + value *= 0x0000000100000001; + break; + case 3: // Nothing to do + break; + default: + VIXL_UNREACHABLE(); + } + + if ((value & 0xff) == 0) { + // Check for 16-bit patterns. Set least-significant 16 bits, to make tests + // easier; we already checked least-significant byte is zero above. + uint64_t generic_value = value | 0xffff; + + // Check 0x00000000_0000pq00 or 0xffffffff_ffffpq00. + if ((generic_value == 0xffff) || (generic_value == UINT64_MAX)) { + return false; + } + + // Check 0x0000pq00_0000pq00 or 0xffffpq00_ffffpq00. + uint64_t rotvalue = RotateRight(value, 32, 64); + if (value == rotvalue) { + generic_value &= 0xffffffff; + if ((generic_value == 0xffff) || (generic_value == UINT32_MAX)) { + return false; + } + } + + // Check 0xpq00pq00_pq00pq00. + rotvalue = RotateRight(value, 16, 64); + if (value == rotvalue) { + return false; + } + } else { + // Check for 8-bit patterns. Set least-significant byte, to make tests + // easier. + uint64_t generic_value = value | 0xff; + + // Check 0x00000000_000000pq or 0xffffffff_ffffffpq. + if ((generic_value == 0xff) || (generic_value == UINT64_MAX)) { + return false; + } + + // Check 0x000000pq_000000pq or 0xffffffpq_ffffffpq. + uint64_t rotvalue = RotateRight(value, 32, 64); + if (value == rotvalue) { + generic_value &= 0xffffffff; + if ((generic_value == 0xff) || (generic_value == UINT32_MAX)) { + return false; + } + } + + // Check 0x00pq00pq_00pq00pq or 0xffpqffpq_ffpqffpq. + rotvalue = RotateRight(value, 16, 64); + if (value == rotvalue) { + generic_value &= 0xffff; + if ((generic_value == 0xff) || (generic_value == UINT16_MAX)) { + return false; + } + } + + // Check 0xpqpqpqpq_pqpqpqpq. + rotvalue = RotateRight(value, 8, 64); + if (value == rotvalue) { + return false; + } + } + return true; +} + +void Disassembler::VisitSVEBroadcastBitmaskImm(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastBitmaskImm)"; + + switch (instr->Mask(SVEBroadcastBitmaskImmMask)) { + case DUPM_z_i: { + uint64_t imm = instr->GetSVEImmLogical(); + if (imm != 0) { + int lane_size = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); + mnemonic = SVEMoveMaskPreferred(imm, lane_size) ? "mov" : "dupm"; + form = "'Zd.'tl, 'ITriSvel"; + } + break; + } + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastFPImm_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastFPImm_Unpredicated)"; + + if (instr->GetSVEVectorFormat() != kFormatVnB) { + switch (instr->Mask(SVEBroadcastFPImm_UnpredicatedMask)) { + case FDUP_z_i: + // The preferred disassembly for fdup is "fmov". + mnemonic = "fmov"; + form = "'Zd.'t, 'IFPSve"; + break; + default: + break; + } + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastGeneralRegister(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastGeneralRegister)"; + + switch (instr->Mask(SVEBroadcastGeneralRegisterMask)) { + case DUP_z_r: + // The preferred disassembly for dup is "mov". + mnemonic = "mov"; + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Zd.'t, 'Xns"; + } else { + form = "'Zd.'t, 'Wns"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastIndexElement(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastIndexElement)"; + + switch (instr->Mask(SVEBroadcastIndexElementMask)) { + case DUP_z_zi: { + // The tsz field must not be zero. + int tsz = instr->ExtractBits(20, 16); + if (tsz != 0) { + // The preferred disassembly for dup is "mov". + mnemonic = "mov"; + int imm2 = instr->ExtractBits(23, 22); + if ((CountSetBits(imm2) + CountSetBits(tsz)) == 1) { + // If imm2:tsz has one set bit, the index is zero. This is + // disassembled as a mov from a b/h/s/d/q scalar register. + form = "'Zd.'ti, 'ti'u0905"; + } else { + form = "'Zd.'ti, 'Zn.'ti['IVInsSVEIndex]"; + } + } + break; + } + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBroadcastIntImm_Unpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBroadcastIntImm_Unpredicated)"; + + switch (instr->Mask(SVEBroadcastIntImm_UnpredicatedMask)) { + case DUP_z_i: + // The encoding of byte-sized lanes with lsl #8 is undefined. + if ((instr->GetSVEVectorFormat() == kFormatVnB) && + (instr->ExtractBit(13) == 1)) + break; + + // The preferred disassembly for dup is "mov". + mnemonic = "mov"; + form = (instr->ExtractBit(13) == 0) ? "'Zd.'t, #'s1205" + : "'Zd.'t, #'s1205, lsl #8"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVECompressActiveElements(const Instruction *instr) { + // The top bit of size is always set for compact, so 't can only be + // substituted with types S and D. + if (instr->ExtractBit(23) == 1) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zn.'t"); + } else { + VisitUnallocated(instr); + } +} + +void Disassembler::VisitSVEConditionallyBroadcastElementToVector( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVEConditionallyExtractElementToGeneralRegister( + const Instruction *instr) { + const char *form = "'Wd, 'Pgl, 'Wd, 'Zn.'t"; + + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Xd, p'u1210, 'Xd, 'Zn.'t"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEConditionallyExtractElementToSIMDFPScalar( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"); +} + +void Disassembler::VisitSVEConditionallyTerminateScalars( + const Instruction *instr) { + const char *form = (instr->ExtractBit(22) == 0) ? "'Wn, 'Wm" : "'Xn, 'Xm"; + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEConstructivePrefix_Unpredicated( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd, 'Zn"); +} + +void Disassembler::VisitSVEContiguousFirstFaultLoad_ScalarPlusScalar( + const Instruction *instr) { + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; + const char *suffix = "]"; + + if (instr->GetRm() != kZeroRegCode) { + switch (form_hash_) { + case "ldff1b_z_p_br_u8"_h: + case "ldff1b_z_p_br_u16"_h: + case "ldff1b_z_p_br_u32"_h: + case "ldff1b_z_p_br_u64"_h: + case "ldff1sb_z_p_br_s16"_h: + case "ldff1sb_z_p_br_s32"_h: + case "ldff1sb_z_p_br_s64"_h: + suffix = ", 'Xm]"; + break; + case "ldff1h_z_p_br_u16"_h: + case "ldff1h_z_p_br_u32"_h: + case "ldff1h_z_p_br_u64"_h: + case "ldff1sh_z_p_br_s32"_h: + case "ldff1sh_z_p_br_s64"_h: + suffix = ", 'Xm, lsl #1]"; + break; + case "ldff1w_z_p_br_u32"_h: + case "ldff1w_z_p_br_u64"_h: + case "ldff1sw_z_p_br_s64"_h: + suffix = ", 'Xm, lsl #2]"; + break; + case "ldff1d_z_p_br_u64"_h: + suffix = ", 'Xm, lsl #3]"; + break; + } + } + + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonFaultLoad_ScalarPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.b}, 'Pgl/z, ['Xns"; + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + switch (form_hash_) { + case "ldnt1d_z_p_bi_contiguous"_h: + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + break; + case "ldnt1h_z_p_bi_contiguous"_h: + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + break; + case "ldnt1w_z_p_bi_contiguous"_h: + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonTemporalLoad_ScalarPlusScalar( + const Instruction *instr) { + const char *form = "{'Zt.b}, 'Pgl/z, ['Xns, 'Rm]"; + switch (form_hash_) { + case "ldnt1d_z_p_br_contiguous"_h: + form = "{'Zt.d}, 'Pgl/z, ['Xns, 'Rm, lsl #3]"; + break; + case "ldnt1h_z_p_br_contiguous"_h: + form = "{'Zt.h}, 'Pgl/z, ['Xns, 'Rm, lsl #1]"; + break; + case "ldnt1w_z_p_br_contiguous"_h: + form = "{'Zt.s}, 'Pgl/z, ['Xns, 'Rm, lsl #2]"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.b}, 'Pgl, ['Xns"; + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + + switch (form_hash_) { + case "stnt1d_z_p_bi_contiguous"_h: + form = "{'Zt.d}, 'Pgl, ['Xns"; + break; + case "stnt1h_z_p_bi_contiguous"_h: + form = "{'Zt.h}, 'Pgl, ['Xns"; + break; + case "stnt1w_z_p_bi_contiguous"_h: + form = "{'Zt.s}, 'Pgl, ['Xns"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEContiguousNonTemporalStore_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEContiguousNonTemporalStore_ScalarPlusScalar)"; + + switch (instr->Mask(SVEContiguousNonTemporalStore_ScalarPlusScalarMask)) { + case STNT1B_z_p_br_contiguous: + mnemonic = "stnt1b"; + form = "{'Zt.b}, 'Pgl, ['Xns, 'Rm]"; + break; + case STNT1D_z_p_br_contiguous: + mnemonic = "stnt1d"; + form = "{'Zt.d}, 'Pgl, ['Xns, 'Rm, lsl #3]"; + break; + case STNT1H_z_p_br_contiguous: + mnemonic = "stnt1h"; + form = "{'Zt.h}, 'Pgl, ['Xns, 'Rm, lsl #1]"; + break; + case STNT1W_z_p_br_contiguous: + mnemonic = "stnt1w"; + form = "{'Zt.s}, 'Pgl, ['Xns, 'Rm, lsl #2]"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusImm( + const Instruction *instr) { + const char *form = (instr->ExtractBits(21, 16) != 0) + ? "'prefSVEOp, 'Pgl, ['Xns, #'s2116, mul vl]" + : "'prefSVEOp, 'Pgl, ['Xns]"; + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEContiguousPrefetch_ScalarPlusScalar( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEContiguousPrefetch_ScalarPlusScalar)"; + + if (instr->GetRm() != kZeroRegCode) { + switch (instr->Mask(SVEContiguousPrefetch_ScalarPlusScalarMask)) { + case PRFB_i_p_br_s: + mnemonic = "prfb"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm]"; + break; + case PRFD_i_p_br_s: + mnemonic = "prfd"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #3]"; + break; + case PRFH_i_p_br_s: + mnemonic = "prfh"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #1]"; + break; + case PRFW_i_p_br_s: + mnemonic = "prfw"; + form = "'prefSVEOp, 'Pgl, ['Xns, 'Rm, lsl #2]"; + break; + default: + break; + } + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousStore_ScalarPlusImm( + const Instruction *instr) { + // The 'size' field isn't in the usual place here. + const char *form = "{'Zt.'tls}, 'Pgl, ['Xns, #'s1916, mul vl]"; + if (instr->ExtractBits(19, 16) == 0) { + form = "{'Zt.'tls}, 'Pgl, ['Xns]"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEContiguousStore_ScalarPlusScalar( + const Instruction *instr) { + // The 'size' field isn't in the usual place here. + FormatWithDecodedMnemonic(instr, "{'Zt.'tls}, 'Pgl, ['Xns, 'Xm'NSveS]"); +} + +void Disassembler::VisitSVECopyFPImm_Predicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopyFPImm_Predicated)"; + + if (instr->GetSVEVectorFormat() != kFormatVnB) { + switch (instr->Mask(SVECopyFPImm_PredicatedMask)) { + case FCPY_z_p_i: + // The preferred disassembly for fcpy is "fmov". + mnemonic = "fmov"; + form = "'Zd.'t, 'Pm/m, 'IFPSve"; + break; + default: + break; + } + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVECopyGeneralRegisterToVector_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopyGeneralRegisterToVector_Predicated)"; + + switch (instr->Mask(SVECopyGeneralRegisterToVector_PredicatedMask)) { + case CPY_z_p_r: + // The preferred disassembly for cpy is "mov". + mnemonic = "mov"; + form = "'Zd.'t, 'Pgl/m, 'Wns"; + if (instr->GetSVESize() == kXRegSizeInBytesLog2) { + form = "'Zd.'t, 'Pgl/m, 'Xns"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVECopyIntImm_Predicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopyIntImm_Predicated)"; + const char *suffix = NULL; + + switch (instr->Mask(SVECopyIntImm_PredicatedMask)) { + case CPY_z_p_i: { + // The preferred disassembly for cpy is "mov". + mnemonic = "mov"; + form = "'Zd.'t, 'Pm/'?14:mz, #'s1205"; + if (instr->ExtractBit(13) != 0) suffix = ", lsl #8"; + break; + } + default: + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVECopySIMDFPScalarRegisterToVector_Predicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVECopySIMDFPScalarRegisterToVector_Predicated)"; + + switch (instr->Mask(SVECopySIMDFPScalarRegisterToVector_PredicatedMask)) { + case CPY_z_p_v: + // The preferred disassembly for cpy is "mov". + mnemonic = "mov"; + form = "'Zd.'t, 'Pgl/m, 'Vnv"; + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEExtractElementToGeneralRegister( + const Instruction *instr) { + const char *form = "'Wd, 'Pgl, 'Zn.'t"; + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Xd, p'u1210, 'Zn.'t"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEExtractElementToSIMDFPScalarRegister( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 'Zn.'t"); +} + +void Disassembler::VisitSVEFFRInitialise(const Instruction *instr) { + DisassembleNoArgs(instr); +} + +void Disassembler::VisitSVEFFRWriteFromPredicate(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pn.b"); +} + +void Disassembler::VisitSVEFPArithmeticWithImm_Predicated( + const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, #"; + const char *suffix00 = "0.0"; + const char *suffix05 = "0.5"; + const char *suffix10 = "1.0"; + const char *suffix20 = "2.0"; + int i1 = instr->ExtractBit(5); + const char *suffix = i1 ? suffix10 : suffix00; + + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + return; + } + + switch (form_hash_) { + case "fadd_z_p_zs"_h: + case "fsubr_z_p_zs"_h: + case "fsub_z_p_zs"_h: + suffix = i1 ? suffix10 : suffix05; + break; + case "fmul_z_p_zs"_h: + suffix = i1 ? suffix20 : suffix05; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEFPArithmetic_Predicated(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); + } +} + +void Disassembler::VisitSVEFPConvertPrecision(const Instruction *instr) { + const char *form = NULL; + + switch (form_hash_) { + case "fcvt_z_p_z_d2h"_h: + form = "'Zd.h, 'Pgl/m, 'Zn.d"; + break; + case "fcvt_z_p_z_d2s"_h: + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + case "fcvt_z_p_z_h2d"_h: + form = "'Zd.d, 'Pgl/m, 'Zn.h"; + break; + case "fcvt_z_p_z_h2s"_h: + form = "'Zd.s, 'Pgl/m, 'Zn.h"; + break; + case "fcvt_z_p_z_s2d"_h: + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + case "fcvt_z_p_z_s2h"_h: + form = "'Zd.h, 'Pgl/m, 'Zn.s"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEFPConvertToInt(const Instruction *instr) { + const char *form = NULL; + + switch (form_hash_) { + case "fcvtzs_z_p_z_d2w"_h: + case "fcvtzu_z_p_z_d2w"_h: + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + case "fcvtzs_z_p_z_d2x"_h: + case "fcvtzu_z_p_z_d2x"_h: + form = "'Zd.d, 'Pgl/m, 'Zn.d"; + break; + case "fcvtzs_z_p_z_fp162h"_h: + case "fcvtzu_z_p_z_fp162h"_h: + form = "'Zd.h, 'Pgl/m, 'Zn.h"; + break; + case "fcvtzs_z_p_z_fp162w"_h: + case "fcvtzu_z_p_z_fp162w"_h: + form = "'Zd.s, 'Pgl/m, 'Zn.h"; + break; + case "fcvtzs_z_p_z_fp162x"_h: + case "fcvtzu_z_p_z_fp162x"_h: + form = "'Zd.d, 'Pgl/m, 'Zn.h"; + break; + case "fcvtzs_z_p_z_s2w"_h: + case "fcvtzu_z_p_z_s2w"_h: + form = "'Zd.s, 'Pgl/m, 'Zn.s"; + break; + case "fcvtzs_z_p_z_s2x"_h: + case "fcvtzu_z_p_z_s2x"_h: + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEFPExponentialAccelerator(const Instruction *instr) { + unsigned size = instr->GetSVESize(); + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t"); + } else { + VisitUnallocated(instr); + } +} + +void Disassembler::VisitSVEFPRoundToIntegralValue(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t"); + } +} + +void Disassembler::VisitSVEFPTrigMulAddCoefficient(const Instruction *instr) { + unsigned size = instr->GetSVESize(); + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zd.'t, 'Zn.'t, #'u1816"); + } else { + VisitUnallocated(instr); + } +} + +void Disassembler::VisitSVEFPTrigSelectCoefficient(const Instruction *instr) { + unsigned size = instr->GetSVESize(); + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); + } else { + VisitUnallocated(instr); + } +} + +void Disassembler::VisitSVEFPUnaryOp(const Instruction *instr) { + if (instr->GetSVESize() == kBRegSizeInBytesLog2) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t"); + } +} + +static const char *IncDecFormHelper(const Instruction *instr, + const char *reg_pat_mul_form, + const char *reg_pat_form, + const char *reg_form) { + if (instr->ExtractBits(19, 16) == 0) { + if (instr->ExtractBits(9, 5) == SVE_ALL) { + // Use the register only form if the multiplier is one (encoded as zero) + // and the pattern is SVE_ALL. + return reg_form; + } + // Use the register and pattern form if the multiplier is one. + return reg_pat_form; + } + return reg_pat_mul_form; +} + +void Disassembler::VisitSVEIncDecRegisterByElementCount( + const Instruction *instr) { + const char *form = + IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEIncDecVectorByElementCount( + const Instruction *instr) { + const char *form = IncDecFormHelper(instr, + "'Zd.'t, 'Ipc, mul #'u1916+1", + "'Zd.'t, 'Ipc", + "'Zd.'t"); + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEInsertGeneralRegister(const Instruction *instr) { + const char *form = "'Zd.'t, 'Wn"; + if (instr->GetSVESize() == kDRegSizeInBytesLog2) { + form = "'Zd.'t, 'Xn"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEInsertSIMDFPScalarRegister( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Vnv"); +} + +void Disassembler::VisitSVEIntAddSubtractImm_Unpredicated( + const Instruction *instr) { + const char *form = (instr->ExtractBit(13) == 0) + ? "'Zd.'t, 'Zd.'t, #'u1205" + : "'Zd.'t, 'Zd.'t, #'u1205, lsl #8"; + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEIntAddSubtractVectors_Predicated( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVEIntCompareScalarCountAndLimit( + const Instruction *instr) { + const char *form = + (instr->ExtractBit(12) == 0) ? "'Pd.'t, 'Wn, 'Wm" : "'Pd.'t, 'Xn, 'Xm"; + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEIntConvertToFP(const Instruction *instr) { + const char *form = NULL; + switch (form_hash_) { + case "scvtf_z_p_z_h2fp16"_h: + case "ucvtf_z_p_z_h2fp16"_h: + form = "'Zd.h, 'Pgl/m, 'Zn.h"; + break; + case "scvtf_z_p_z_w2d"_h: + case "ucvtf_z_p_z_w2d"_h: + form = "'Zd.d, 'Pgl/m, 'Zn.s"; + break; + case "scvtf_z_p_z_w2fp16"_h: + case "ucvtf_z_p_z_w2fp16"_h: + form = "'Zd.h, 'Pgl/m, 'Zn.s"; + break; + case "scvtf_z_p_z_w2s"_h: + case "ucvtf_z_p_z_w2s"_h: + form = "'Zd.s, 'Pgl/m, 'Zn.s"; + break; + case "scvtf_z_p_z_x2d"_h: + case "ucvtf_z_p_z_x2d"_h: + form = "'Zd.d, 'Pgl/m, 'Zn.d"; + break; + case "scvtf_z_p_z_x2fp16"_h: + case "ucvtf_z_p_z_x2fp16"_h: + form = "'Zd.h, 'Pgl/m, 'Zn.d"; + break; + case "scvtf_z_p_z_x2s"_h: + case "ucvtf_z_p_z_x2s"_h: + form = "'Zd.s, 'Pgl/m, 'Zn.d"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEIntDivideVectors_Predicated( + const Instruction *instr) { + unsigned size = instr->GetSVESize(); + if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); + } else { + VisitUnallocated(instr); + } +} + +void Disassembler::VisitSVEIntMinMaxDifference_Predicated( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVEIntMinMaxImm_Unpredicated(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zd.'t, #"; + const char *suffix = "'u1205"; + + switch (form_hash_) { + case "smax_z_zi"_h: + case "smin_z_zi"_h: + suffix = "'s1205"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEIntMulImm_Unpredicated(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zd.'t, #'s1205"); +} + +void Disassembler::VisitSVEIntMulVectors_Predicated(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVELoadAndBroadcastElement(const Instruction *instr) { + const char *form = "(SVELoadAndBroadcastElement)"; + const char *suffix_b = ", #'u2116]"; + const char *suffix_h = ", #'u2116*2]"; + const char *suffix_w = ", #'u2116*4]"; + const char *suffix_d = ", #'u2116*8]"; + const char *suffix = NULL; + + switch (form_hash_) { + case "ld1rb_z_p_bi_u8"_h: + form = "{'Zt.b}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case "ld1rb_z_p_bi_u16"_h: + case "ld1rsb_z_p_bi_s16"_h: + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case "ld1rb_z_p_bi_u32"_h: + case "ld1rsb_z_p_bi_s32"_h: + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case "ld1rb_z_p_bi_u64"_h: + case "ld1rsb_z_p_bi_s64"_h: + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_b; + break; + case "ld1rh_z_p_bi_u16"_h: + form = "{'Zt.h}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case "ld1rh_z_p_bi_u32"_h: + case "ld1rsh_z_p_bi_s32"_h: + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case "ld1rh_z_p_bi_u64"_h: + case "ld1rsh_z_p_bi_s64"_h: + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_h; + break; + case "ld1rw_z_p_bi_u32"_h: + form = "{'Zt.s}, 'Pgl/z, ['Xns"; + suffix = suffix_w; + break; + case "ld1rsw_z_p_bi_s64"_h: + case "ld1rw_z_p_bi_u64"_h: + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_w; + break; + case "ld1rd_z_p_bi_u64"_h: + form = "{'Zt.d}, 'Pgl/z, ['Xns"; + suffix = suffix_d; + break; + } + + // Hide curly brackets if immediate is zero. + if (instr->ExtractBits(21, 16) == 0) { + suffix = "]"; + } + + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.'tmsz}, 'Pgl/z, ['Xns"; + const char *suffix = ", #'s1916*16]"; + + switch (form_hash_) { + case "ld1rob_z_p_bi_u8"_h: + case "ld1rod_z_p_bi_u64"_h: + case "ld1roh_z_p_bi_u16"_h: + case "ld1row_z_p_bi_u32"_h: + suffix = ", #'s1916*32]"; + break; + } + if (instr->ExtractBits(19, 16) == 0) suffix = "]"; + + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVELoadAndBroadcastQOWord_ScalarPlusScalar( + const Instruction *instr) { + const char *form = "{'Zt.'tmsz}, 'Pgl/z, ['Xns, "; + const char *suffix = "'Rm, lsl #'u2423]"; + + switch (form_hash_) { + case "ld1rqb_z_p_br_contiguous"_h: + case "ld1rob_z_p_br_contiguous"_h: + suffix = "'Rm]"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl/z, ['Xns'ISveSvl]"; + + switch (form_hash_) { + case "ld3b_z_p_bi_contiguous"_h: + case "ld3d_z_p_bi_contiguous"_h: + case "ld3h_z_p_bi_contiguous"_h: + case "ld3w_z_p_bi_contiguous"_h: + form = form_3; + break; + case "ld4b_z_p_bi_contiguous"_h: + case "ld4d_z_p_bi_contiguous"_h: + case "ld4h_z_p_bi_contiguous"_h: + case "ld4w_z_p_bi_contiguous"_h: + form = form_4; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVELoadMultipleStructures_ScalarPlusScalar( + const Instruction *instr) { + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl/z, ['Xns, 'Xm'NSveS]"; + + switch (form_hash_) { + case "ld3b_z_p_br_contiguous"_h: + case "ld3d_z_p_br_contiguous"_h: + case "ld3h_z_p_br_contiguous"_h: + case "ld3w_z_p_br_contiguous"_h: + form = form_3; + break; + case "ld4b_z_p_br_contiguous"_h: + case "ld4d_z_p_br_contiguous"_h: + case "ld4h_z_p_br_contiguous"_h: + case "ld4w_z_p_br_contiguous"_h: + form = form_4; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVELoadPredicateRegister(const Instruction *instr) { + const char *form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Pd, ['Xns]"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVELoadVectorRegister(const Instruction *instr) { + const char *form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Zd, ['Xns]"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEPartitionBreakCondition(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/'?04:mz, 'Pn.b"); +} + +void Disassembler::VisitSVEPermutePredicateElements(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn.'t, 'Pm.'t"); +} + +void Disassembler::VisitSVEPredicateFirstActive(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.b, 'Pn, 'Pd.b"); +} + +void Disassembler::VisitSVEPredicateReadFromFFR_Unpredicated( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.b"); +} + +void Disassembler::VisitSVEPredicateTest(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "p'u1310, 'Pn.b"); +} + +void Disassembler::VisitSVEPredicateZero(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.b"); +} + +void Disassembler::VisitSVEPropagateBreakToNextPartition( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/z, 'Pn.b, 'Pd.b"); +} + +void Disassembler::VisitSVEReversePredicateElements(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn.'t"); +} + +void Disassembler::VisitSVEReverseVectorElements(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVEReverseWithinElements(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; + + unsigned size = instr->GetSVESize(); + switch (instr->Mask(SVEReverseWithinElementsMask)) { + case RBIT_z_p_z: + mnemonic = "rbit"; + break; + case REVB_z_z: + if ((size == kHRegSizeInBytesLog2) || (size == kSRegSizeInBytesLog2) || + (size == kDRegSizeInBytesLog2)) { + mnemonic = "revb"; + } else { + form = "(SVEReverseWithinElements)"; + } + break; + case REVH_z_z: + if ((size == kSRegSizeInBytesLog2) || (size == kDRegSizeInBytesLog2)) { + mnemonic = "revh"; + } else { + form = "(SVEReverseWithinElements)"; + } + break; + case REVW_z_z: + if (size == kDRegSizeInBytesLog2) { + mnemonic = "revw"; + } else { + form = "(SVEReverseWithinElements)"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVESaturatingIncDecRegisterByElementCount( + const Instruction *instr) { + const char *form = IncDecFormHelper(instr, + "'R20d, 'Ipc, mul #'u1916+1", + "'R20d, 'Ipc", + "'R20d"); + const char *form_sx = IncDecFormHelper(instr, + "'Xd, 'Wd, 'Ipc, mul #'u1916+1", + "'Xd, 'Wd, 'Ipc", + "'Xd, 'Wd"); + + switch (form_hash_) { + case "sqdecb_r_rs_sx"_h: + case "sqdecd_r_rs_sx"_h: + case "sqdech_r_rs_sx"_h: + case "sqdecw_r_rs_sx"_h: + case "sqincb_r_rs_sx"_h: + case "sqincd_r_rs_sx"_h: + case "sqinch_r_rs_sx"_h: + case "sqincw_r_rs_sx"_h: + form = form_sx; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVESaturatingIncDecVectorByElementCount( + const Instruction *instr) { + const char *form = IncDecFormHelper(instr, + "'Zd.'t, 'Ipc, mul #'u1916+1", + "'Zd.'t, 'Ipc", + "'Zd.'t"); + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl, ['Xns'ISveSvl]"; + + switch (form_hash_) { + case "st3b_z_p_bi_contiguous"_h: + case "st3h_z_p_bi_contiguous"_h: + case "st3w_z_p_bi_contiguous"_h: + case "st3d_z_p_bi_contiguous"_h: + form = form_3; + break; + case "st4b_z_p_bi_contiguous"_h: + case "st4h_z_p_bi_contiguous"_h: + case "st4w_z_p_bi_contiguous"_h: + case "st4d_z_p_bi_contiguous"_h: + form = form_4; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEStoreMultipleStructures_ScalarPlusScalar( + const Instruction *instr) { + const char *form = "{'Zt.'tmsz, 'Zt2.'tmsz}"; + const char *form_3 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz}"; + const char *form_4 = "{'Zt.'tmsz, 'Zt2.'tmsz, 'Zt3.'tmsz, 'Zt4.'tmsz}"; + const char *suffix = ", 'Pgl, ['Xns, 'Xm'NSveS]"; + + switch (form_hash_) { + case "st3b_z_p_br_contiguous"_h: + case "st3d_z_p_br_contiguous"_h: + case "st3h_z_p_br_contiguous"_h: + case "st3w_z_p_br_contiguous"_h: + form = form_3; + break; + case "st4b_z_p_br_contiguous"_h: + case "st4d_z_p_br_contiguous"_h: + case "st4h_z_p_br_contiguous"_h: + case "st4w_z_p_br_contiguous"_h: + form = form_4; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEStorePredicateRegister(const Instruction *instr) { + const char *form = "'Pd, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Pd, ['Xns]"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEStoreVectorRegister(const Instruction *instr) { + const char *form = "'Zt, ['Xns, #'s2116:1210, mul vl]"; + if (instr->Mask(0x003f1c00) == 0) { + form = "'Zd, ['Xns]"; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVETableLookup(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, {'Zn.'t}, 'Zm.'t"); +} + +void Disassembler::VisitSVEUnpackPredicateElements(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.h, 'Pn.b"); +} + +void Disassembler::VisitSVEUnpackVectorElements(const Instruction *instr) { + if (instr->GetSVESize() == 0) { + // The lowest lane size of the destination vector is H-sized lane. + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'th"); + } +} + +void Disassembler::VisitSVEVectorSplice(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl, 'Zd.'t, 'Zn.'t"); +} + +void Disassembler::VisitSVEAddressGeneration(const Instruction *instr) { + const char *mnemonic = "adr"; + const char *form = "'Zd.d, ['Zn.d, 'Zm.d"; + const char *suffix = NULL; + + bool msz_is_zero = (instr->ExtractBits(11, 10) == 0); + + switch (instr->Mask(SVEAddressGenerationMask)) { + case ADR_z_az_d_s32_scaled: + suffix = msz_is_zero ? ", sxtw]" : ", sxtw #'u1110]"; + break; + case ADR_z_az_d_u32_scaled: + suffix = msz_is_zero ? ", uxtw]" : ", uxtw #'u1110]"; + break; + case ADR_z_az_s_same_scaled: + case ADR_z_az_d_same_scaled: + form = "'Zd.'t, ['Zn.'t, 'Zm.'t"; + suffix = msz_is_zero ? "]" : ", lsl #'u1110]"; + break; + default: + mnemonic = "unimplemented"; + form = "(SVEAddressGeneration)"; + break; + } + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEBitwiseLogicalUnpredicated( + const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "'Zd.d, 'Zn.d, 'Zm.d"; + + switch (instr->Mask(SVEBitwiseLogicalUnpredicatedMask)) { + case AND_z_zz: + mnemonic = "and"; + break; + case BIC_z_zz: + mnemonic = "bic"; + break; + case EOR_z_zz: + mnemonic = "eor"; + break; + case ORR_z_zz: + mnemonic = "orr"; + if (instr->GetRn() == instr->GetRm()) { + mnemonic = "mov"; + form = "'Zd.d, 'Zn.d"; + } + break; + default: + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEBitwiseShiftUnpredicated(const Instruction *instr) { + const char *mnemonic = "unimplemented"; + const char *form = "(SVEBitwiseShiftUnpredicated)"; + unsigned tsize = + (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19); + unsigned lane_size = instr->GetSVESize(); + + const char *suffix = NULL; + const char *form_i = "'Zd.'tszs, 'Zn.'tszs, "; + + switch (form_hash_) { + case "asr_z_zi"_h: + case "lsr_z_zi"_h: + case "sri_z_zzi"_h: + case "srsra_z_zi"_h: + case "ssra_z_zi"_h: + case "ursra_z_zi"_h: + case "usra_z_zi"_h: + if (tsize != 0) { + // The tsz field must not be zero. + mnemonic = mnemonic_.c_str(); + form = form_i; + suffix = "'ITriSves"; + } + break; + case "lsl_z_zi"_h: + case "sli_z_zzi"_h: + if (tsize != 0) { + // The tsz field must not be zero. + mnemonic = mnemonic_.c_str(); + form = form_i; + suffix = "'ITriSver"; + } + break; + case "asr_z_zw"_h: + case "lsl_z_zw"_h: + case "lsr_z_zw"_h: + if (lane_size <= kSRegSizeInBytesLog2) { + mnemonic = mnemonic_.c_str(); + form = "'Zd.'t, 'Zn.'t, 'Zm.d"; + } + break; + default: + break; + } + + Format(instr, mnemonic, form, suffix); +} + +void Disassembler::VisitSVEElementCount(const Instruction *instr) { + const char *form = + IncDecFormHelper(instr, "'Xd, 'Ipc, mul #'u1916+1", "'Xd, 'Ipc", "'Xd"); + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEFPAccumulatingReduction(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 't'u0400, 'Zn.'t"); + } +} + +void Disassembler::VisitSVEFPArithmeticUnpredicated(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); + } +} + +void Disassembler::VisitSVEFPCompareVectors(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"); + } +} + +void Disassembler::VisitSVEFPCompareWithZero(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #0.0"); + } +} + +void Disassembler::VisitSVEFPComplexAddition(const Instruction *instr) { + // Bit 15 is always set, so this gives 90 * 1 or 3. + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t, #'u1615*90"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, form); + } +} + +void Disassembler::VisitSVEFPComplexMulAdd(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t, #'u1413*90"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, form); + } +} + +void Disassembler::VisitSVEFPComplexMulAddIndex(const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2019]"; + const char *suffix = ", #'u1110*90"; + switch (form_hash_) { + case "fcmla_z_zzzi_s"_h: + form = "'Zd.s, 'Zn.s, z'u1916.s['u2020]"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEFPFastReduction(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'t'u0400, 'Pgl, 'Zn.'t"); + } +} + +void Disassembler::VisitSVEFPMulIndex(const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + switch (form_hash_) { + case "fmul_z_zzi_d"_h: + form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + break; + case "fmul_z_zzi_s"_h: + form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEFPMulAdd(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t, 'Zm.'t"); + } +} + +void Disassembler::VisitSVEFPMulAddIndex(const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + switch (form_hash_) { + case "fmla_z_zzzi_s"_h: + case "fmls_z_zzzi_s"_h: + form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + break; + case "fmla_z_zzzi_d"_h: + case "fmls_z_zzzi_d"_h: + form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEFPUnaryOpUnpredicated(const Instruction *instr) { + if (instr->GetSVEVectorFormat() == kFormatVnB) { + VisitUnallocated(instr); + } else { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t"); + } +} + +void Disassembler::VisitSVEIncDecByPredicateCount(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pn"; + switch (form_hash_) { + // , . + case "decp_r_p_r"_h: + case "incp_r_p_r"_h: + form = "'Xd, 'Pn.'t"; + break; + // , ., + case "sqdecp_r_p_r_sx"_h: + case "sqincp_r_p_r_sx"_h: + form = "'Xd, 'Pn.'t, 'Wd"; + break; + // , . + case "sqdecp_r_p_r_x"_h: + case "sqincp_r_p_r_x"_h: + case "uqdecp_r_p_r_x"_h: + case "uqincp_r_p_r_x"_h: + form = "'Xd, 'Pn.'t"; + break; + // , . + case "uqdecp_r_p_r_uw"_h: + case "uqincp_r_p_r_uw"_h: + form = "'Wd, 'Pn.'t"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEIndexGeneration(const Instruction *instr) { + const char *form = "'Zd.'t, #'s0905, #'s2016"; + bool w_inputs = + static_cast(instr->GetSVESize()) <= kWRegSizeInBytesLog2; + + switch (form_hash_) { + case "index_z_ir"_h: + form = w_inputs ? "'Zd.'t, #'s0905, 'Wm" : "'Zd.'t, #'s0905, 'Xm"; + break; + case "index_z_ri"_h: + form = w_inputs ? "'Zd.'t, 'Wn, #'s2016" : "'Zd.'t, 'Xn, #'s2016"; + break; + case "index_z_rr"_h: + form = w_inputs ? "'Zd.'t, 'Wn, 'Wm" : "'Zd.'t, 'Xn, 'Xm"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEIntArithmeticUnpredicated(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); +} + +void Disassembler::VisitSVEIntCompareSignedImm(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #'s2016"); +} + +void Disassembler::VisitSVEIntCompareUnsignedImm(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pgl/z, 'Zn.'t, #'u2014"); +} + +void Disassembler::VisitSVEIntCompareVectors(const Instruction *instr) { + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm."; + const char *suffix = "d"; + switch (form_hash_) { + case "cmpeq_p_p_zz"_h: + case "cmpge_p_p_zz"_h: + case "cmpgt_p_p_zz"_h: + case "cmphi_p_p_zz"_h: + case "cmphs_p_p_zz"_h: + case "cmpne_p_p_zz"_h: + suffix = "'t"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEIntMulAddPredicated(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, "; + const char *suffix = "'Zn.'t, 'Zm.'t"; + switch (form_hash_) { + case "mad_z_p_zzz"_h: + case "msb_z_p_zzz"_h: + suffix = "'Zm.'t, 'Zn.'t"; + break; + } + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEIntMulAddUnpredicated(const Instruction *instr) { + if (static_cast(instr->GetSVESize()) >= kSRegSizeInBytesLog2) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'tq, 'Zm.'tq"); + } else { + VisitUnallocated(instr); + } +} + +void Disassembler::VisitSVEMovprfx(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/'?16:mz, 'Zn.'t"); +} + +void Disassembler::VisitSVEIntReduction(const Instruction *instr) { + const char *form = "'Vdv, 'Pgl, 'Zn.'t"; + switch (form_hash_) { + case "saddv_r_p_z"_h: + case "uaddv_r_p_z"_h: + form = "'Dd, 'Pgl, 'Zn.'t"; + break; + } + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEIntUnaryArithmeticPredicated( + const Instruction *instr) { + VectorFormat vform = instr->GetSVEVectorFormat(); + + switch (form_hash_) { + case "sxtw_z_p_z"_h: + case "uxtw_z_p_z"_h: + if (vform == kFormatVnS) { + VisitUnallocated(instr); + return; + } + VIXL_FALLTHROUGH(); + case "sxth_z_p_z"_h: + case "uxth_z_p_z"_h: + if (vform == kFormatVnH) { + VisitUnallocated(instr); + return; + } + VIXL_FALLTHROUGH(); + case "sxtb_z_p_z"_h: + case "uxtb_z_p_z"_h: + case "fabs_z_p_z"_h: + case "fneg_z_p_z"_h: + if (vform == kFormatVnB) { + VisitUnallocated(instr); + return; + } + break; + } + + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Pgl/m, 'Zn.'t"); +} + +void Disassembler::VisitSVEMulIndex(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019]"; + + switch (form_hash_) { + case "sdot_z_zzzi_d"_h: + case "udot_z_zzzi_d"_h: + form = "'Zd.d, 'Zn.h, z'u1916.h['u2020]"; + break; + } + + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEPermuteVectorExtract(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.b, 'Zd.b, 'Zn.b, #'u2016:1210"); +} + +void Disassembler::VisitSVEPermuteVectorInterleaving(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Zd.'t, 'Zn.'t, 'Zm.'t"); +} + +void Disassembler::VisitSVEPredicateCount(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Xd, p'u1310, 'Pn.'t"); +} + +void Disassembler::VisitSVEPredicateLogical(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"; + + int pd = instr->GetPd(); + int pn = instr->GetPn(); + int pm = instr->GetPm(); + int pg = instr->ExtractBits(13, 10); + + switch (form_hash_) { + case "ands_p_p_pp_z"_h: + if (pn == pm) { + mnemonic = "movs"; + form = "'Pd.b, p'u1310/z, 'Pn.b"; + } + break; + case "and_p_p_pp_z"_h: + if (pn == pm) { + mnemonic = "mov"; + form = "'Pd.b, p'u1310/z, 'Pn.b"; + } + break; + case "eors_p_p_pp_z"_h: + if (pm == pg) { + mnemonic = "nots"; + form = "'Pd.b, 'Pm/z, 'Pn.b"; + } + break; + case "eor_p_p_pp_z"_h: + if (pm == pg) { + mnemonic = "not"; + form = "'Pd.b, 'Pm/z, 'Pn.b"; + } + break; + case "orrs_p_p_pp_z"_h: + if ((pn == pm) && (pn == pg)) { + mnemonic = "movs"; + form = "'Pd.b, 'Pn.b"; + } + break; + case "orr_p_p_pp_z"_h: + if ((pn == pm) && (pn == pg)) { + mnemonic = "mov"; + form = "'Pd.b, 'Pn.b"; + } + break; + case "sel_p_p_pp"_h: + if (pd == pm) { + mnemonic = "mov"; + form = "'Pd.b, p'u1310/m, 'Pn.b"; + } else { + form = "'Pd.b, p'u1310, 'Pn.b, 'Pm.b"; + } + break; + } + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEPredicateInitialize(const Instruction *instr) { + const char *form = "'Pd.'t, 'Ipc"; + // Omit the pattern if it is the default ('ALL'). + if (instr->ExtractBits(9, 5) == SVE_ALL) form = "'Pd.'t"; + FormatWithDecodedMnemonic(instr, form); +} + +void Disassembler::VisitSVEPredicateNextActive(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.'t, 'Pn, 'Pd.'t"); +} + +void Disassembler::VisitSVEPredicateReadFromFFR_Predicated( + const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.b, 'Pn/z"); +} + +void Disassembler::VisitSVEPropagateBreak(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Pd.b, p'u1310/z, 'Pn.b, 'Pm.b"); +} + +void Disassembler::VisitSVEStackFrameAdjustment(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Xds, 'Xms, #'s1005"); +} + +void Disassembler::VisitSVEStackFrameSize(const Instruction *instr) { + FormatWithDecodedMnemonic(instr, "'Xd, #'s1005"); +} + +void Disassembler::VisitSVEVectorSelect(const Instruction *instr) { + const char *mnemonic = mnemonic_.c_str(); + const char *form = "'Zd.'t, p'u1310, 'Zn.'t, 'Zm.'t"; + + if (instr->GetRd() == instr->GetRm()) { + mnemonic = "mov"; + form = "'Zd.'t, p'u1310/m, 'Zn.'t"; + } + + Format(instr, mnemonic, form); +} + +void Disassembler::VisitSVEContiguousLoad_ScalarPlusImm( + const Instruction *instr) { + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns"; + const char *suffix = + (instr->ExtractBits(19, 16) == 0) ? "]" : ", #'s1916, mul vl]"; + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitSVEContiguousLoad_ScalarPlusScalar( + const Instruction *instr) { + const char *form = "{'Zt.'tlss}, 'Pgl/z, ['Xns, 'Xm"; + const char *suffix = "]"; + + switch (form_hash_) { + case "ld1h_z_p_br_u16"_h: + case "ld1h_z_p_br_u32"_h: + case "ld1h_z_p_br_u64"_h: + case "ld1w_z_p_br_u32"_h: + case "ld1w_z_p_br_u64"_h: + case "ld1d_z_p_br_u64"_h: + suffix = ", lsl #'u2423]"; + break; + case "ld1sh_z_p_br_s32"_h: + case "ld1sh_z_p_br_s64"_h: + suffix = ", lsl #1]"; + break; + case "ld1sw_z_p_br_s64"_h: + suffix = ", lsl #2]"; + break; + } + + FormatWithDecodedMnemonic(instr, form, suffix); +} + +void Disassembler::VisitReserved(const Instruction *instr) { + // UDF is the only instruction in this group, and the Decoder is precise. + VIXL_ASSERT(instr->Mask(ReservedMask) == UDF); + Format(instr, "udf", "'IUdf"); +} + +void Disassembler::VisitUnimplemented(const Instruction *instr) { + Format(instr, "unimplemented", "(Unimplemented)"); +} + + +void Disassembler::VisitUnallocated(const Instruction *instr) { + Format(instr, "unallocated", "(Unallocated)"); +} + +void Disassembler::Visit(Metadata *metadata, const Instruction *instr) { + VIXL_ASSERT(metadata->count("form") > 0); + const std::string &form = (*metadata)["form"]; + form_hash_ = Hash(form.c_str()); + const FormToVisitorFnMap *fv = Disassembler::GetFormToVisitorFnMap(); + FormToVisitorFnMap::const_iterator it = fv->find(form_hash_); + if (it == fv->end()) { + VisitUnimplemented(instr); + } else { + SetMnemonicFromForm(form); + (it->second)(this, instr); + } +} + +void Disassembler::Disassemble_PdT_PgZ_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Pd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + VectorFormat vform = instr->GetSVEVectorFormat(); + + if ((vform == kFormatVnS) || (vform == kFormatVnD)) { + Format(instr, "unimplemented", "(PdT_PgZ_ZnT_ZmT)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdB_Zn1B_Zn2B_imm(const Instruction *instr) { + const char *form = "'Zd.b, {'Zn.b, 'Zn2.b}, #'u2016:1210"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdB_ZnB_ZmB(const Instruction *instr) { + const char *form = "'Zd.b, 'Zn.b, 'Zm.b"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdB_ZnB_ZmB)"); + } +} + +void Disassembler::Disassemble_ZdD_PgM_ZnS(const Instruction *instr) { + const char *form = "'Zd.d, 'Pgl/m, 'Zn.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdD_ZnD_ZmD(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.d, 'Zm.d"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdD_ZnD_ZmD_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdD_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.s, z'u1916.s['u2020:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdH_PgM_ZnS(const Instruction *instr) { + const char *form = "'Zd.h, 'Pgl/m, 'Zn.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdH_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_PgM_ZnD(const Instruction *instr) { + const char *form = "'Zd.s, 'Pgl/m, 'Zn.d"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_PgM_ZnH(const Instruction *instr) { + const char *form = "'Zd.s, 'Pgl/m, 'Zn.h"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_PgM_ZnS(const Instruction *instr) { + const char *form = "'Zd.s, 'Pgl/m, 'Zn.s"; + if (instr->GetSVEVectorFormat() == kFormatVnS) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdS_PgM_ZnS)"); + } +} + +void Disassembler::Disassemble_ZdS_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.h, z'u1816.h['u2019:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_ZnS_ZmS(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, 'Zm.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdS_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEFlogb(const Instruction *instr) { + const char *form = "'Zd.'tf, 'Pgl/m, 'Zn.'tf"; + if (instr->GetSVEVectorFormat(17) == kFormatVnB) { + Format(instr, "unimplemented", "(SVEFlogb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdT_PgM_ZnT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_PgZ_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/z, 'Zn.'t, 'Zm.'t"; + VectorFormat vform = instr->GetSVEVectorFormat(); + if ((vform == kFormatVnS) || (vform == kFormatVnD)) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdT_PgZ_ZnT_ZmT)"); + } +} + +void Disassembler::Disassemble_ZdT_Pg_Zn1T_Zn2T(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl, {'Zn.'t, 'Zn2.'t}"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_Zn1T_Zn2T_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, {'Zn.'t, 'Zn2.'t}, 'Zm.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdT_ZnT_ZmTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'th"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(ZdT_ZnT_ZmTb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdT_ZnTb(const Instruction *instr) { + const char *form = "'Zd.'tszs, 'Zn.'tszd"; + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int shift_dist = shift_and_lane_size.first; + int lane_size = shift_and_lane_size.second; + // Convert shift_dist from a right to left shift. Valid xtn instructions + // must have a left shift_dist equivalent of zero. + shift_dist = (8 << lane_size) - shift_dist; + if ((lane_size >= static_cast(kBRegSizeInBytesLog2)) && + (lane_size <= static_cast(kSRegSizeInBytesLog2)) && + (shift_dist == 0)) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(ZdT_ZnTb)"); + } +} + +void Disassembler::Disassemble_ZdT_ZnTb_ZmTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + // TODO: This is correct for saddlbt, ssublbt, subltb, which don't have + // b-lane sized form, and for pmull[b|t] as feature `SVEPmull128` isn't + // supported, but may need changes for other instructions reaching here. + Format(instr, "unimplemented", "(ZdT_ZnTb_ZmTb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::DisassembleSVEAddSubHigh(const Instruction *instr) { + const char *form = "'Zd.'th, 'Zn.'t, 'Zm.'t"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(SVEAddSubHigh)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::DisassembleSVEShiftLeftImm(const Instruction *instr) { + const char *form = "'Zd.'tszd, 'Zn.'tszs, 'ITriSver"; + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + if ((lane_size >= static_cast(kBRegSizeInBytesLog2)) && + (lane_size <= static_cast(kSRegSizeInBytesLog2))) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(SVEShiftLeftImm)"); + } +} + +void Disassembler::DisassembleSVEShiftRightImm(const Instruction *instr) { + const char *form = "'Zd.'tszs, 'Zn.'tszd, 'ITriSves"; + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(/* is_predicated = */ false); + int lane_size = shift_and_lane_size.second; + if ((lane_size >= static_cast(kBRegSizeInBytesLog2)) && + (lane_size <= static_cast(kSRegSizeInBytesLog2))) { + Format(instr, mnemonic_.c_str(), form); + } else { + Format(instr, "unimplemented", "(SVEShiftRightImm)"); + } +} + +void Disassembler::Disassemble_ZdaD_ZnD_ZmD_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.d, z'u1916.d['u2020]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaD_ZnH_ZmH_imm_const( + const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.h, z'u1916.h['u2020], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaD_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.d, 'Zn.s, z'u1916.s['u2020:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2222:2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaH_ZnH_ZmH_imm_const( + const Instruction *instr) { + const char *form = "'Zd.h, 'Zn.h, z'u1816.h['u2019], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnB_ZmB(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.b, 'Zm.b"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnB_ZmB_imm_const( + const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.b, z'u1816.b['u2019], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnH_ZmH(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.h, 'Zm.h"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnH_ZmH_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.h, z'u1816.h['u2019:1111]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm(const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, z'u1816.s['u2019]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaS_ZnS_ZmS_imm_const( + const Instruction *instr) { + const char *form = "'Zd.s, 'Zn.s, z'u1916.s['u2020], #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_PgM_ZnTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zn.'th"; + + if (instr->GetSVESize() == 0) { + // The lowest lane size of the destination vector is H-sized lane. + Format(instr, "unimplemented", "(Disassemble_ZdaT_PgM_ZnTb)"); + return; + } + + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEAddSubCarry(const Instruction *instr) { + const char *form = "'Zd.'?22:ds, 'Zn.'?22:ds, 'Zm.'?22:ds"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_ZnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_ZnT_ZmT_const(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'t, 'Zm.'t, #'u1110*90"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdaT_ZnTb_ZmTb(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'th, 'Zm.'th"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(ZdaT_ZnTb_ZmTb)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdaT_ZnTb_ZmTb_const(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zn.'tq, 'Zm.'tq, #'u1110*90"; + VectorFormat vform = instr->GetSVEVectorFormat(); + + if ((vform == kFormatVnB) || (vform == kFormatVnH)) { + Format(instr, "unimplemented", "(ZdaT_ZnTb_ZmTb_const)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdnB_ZdnB(const Instruction *instr) { + const char *form = "'Zd.b, 'Zd.b"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdnB_ZdnB_ZmB(const Instruction *instr) { + const char *form = "'Zd.b, 'Zd.b, 'Zn.b"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEBitwiseTernary(const Instruction *instr) { + const char *form = "'Zd.d, 'Zd.d, 'Zm.d, 'Zn.d"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_ZdnS_ZdnS_ZmS(const Instruction *instr) { + const char *form = "'Zd.s, 'Zd.s, 'Zn.s"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEFPPair(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + if (instr->GetSVEVectorFormat() == kFormatVnB) { + Format(instr, "unimplemented", "(SVEFPPair)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZdnT_PgM_ZdnT_ZmT(const Instruction *instr) { + const char *form = "'Zd.'t, 'Pgl/m, 'Zd.'t, 'Zn.'t"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSVEComplexIntAddition(const Instruction *instr) { + const char *form = "'Zd.'t, 'Zd.'t, 'Zn.'t, #"; + const char *suffix = (instr->ExtractBit(10) == 0) ? "90" : "270"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZdnT_ZdnT_ZmT_const(const Instruction *instr) { + const char *form = "'Zd.'tszs, 'Zd.'tszs, 'Zn.'tszs, 'ITriSves"; + unsigned tsize = + (instr->ExtractBits(23, 22) << 2) | instr->ExtractBits(20, 19); + + if (tsize == 0) { + Format(instr, "unimplemented", "(ZdnT_ZdnT_ZmT_const)"); + } else { + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::Disassemble_ZtD_PgZ_ZnD_Xm(const Instruction *instr) { + const char *form = "{'Zt.d}, 'Pgl/z, ['Zn.d"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZtD_Pg_ZnD_Xm(const Instruction *instr) { + const char *form = "{'Zt.d}, 'Pgl, ['Zn.d"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZtS_PgZ_ZnS_Xm(const Instruction *instr) { + const char *form = "{'Zt.s}, 'Pgl/z, ['Zn.s"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_ZtS_Pg_ZnS_Xm(const Instruction *instr) { + const char *form = "{'Zt.s}, 'Pgl, ['Zn.s"; + const char *suffix = instr->GetRm() == 31 ? "]" : ", 'Xm]"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_XdSP_XnSP_Xm(const Instruction *instr) { + const char *form = "'Xds, 'Xns"; + const char *suffix = instr->GetRm() == 31 ? "" : ", 'Xm"; + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::Disassemble_XdSP_XnSP_uimm6_uimm4(const Instruction *instr) { + VIXL_STATIC_ASSERT(kMTETagGranuleInBytes == 16); + const char *form = "'Xds, 'Xns, #'u2116*16, #'u1310"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_Xd_XnSP_Xm(const Instruction *instr) { + const char *form = "'Rd, 'Xns, 'Rm"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::Disassemble_Xd_XnSP_XmSP(const Instruction *instr) { + if ((form_hash_ == Hash("subps_64s_dp_2src")) && (instr->GetRd() == 31)) { + Format(instr, "cmpp", "'Xns, 'Xms"); + } else { + const char *form = "'Xd, 'Xns, 'Xms"; + Format(instr, mnemonic_.c_str(), form); + } +} + +void Disassembler::DisassembleMTEStoreTagPair(const Instruction *instr) { + const char *form = "'Xt, 'Xt2, ['Xns"; + const char *suffix = NULL; + switch (form_hash_) { + case Hash("stgp_64_ldstpair_off"): + suffix = ", #'s2115*16]"; + break; + case Hash("stgp_64_ldstpair_post"): + suffix = "], #'s2115*16"; + break; + case Hash("stgp_64_ldstpair_pre"): + suffix = ", #'s2115*16]!"; + break; + default: + mnemonic_ = "unimplemented"; + break; + } + + if (instr->GetImmLSPair() == 0) { + suffix = "]"; + } + + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::DisassembleMTEStoreTag(const Instruction *instr) { + const char *form = "'Xds, ['Xns"; + const char *suffix = NULL; + switch (form_hash_) { + case Hash("st2g_64soffset_ldsttags"): + case Hash("stg_64soffset_ldsttags"): + case Hash("stz2g_64soffset_ldsttags"): + case Hash("stzg_64soffset_ldsttags"): + suffix = ", #'s2012*16]"; + break; + case Hash("st2g_64spost_ldsttags"): + case Hash("stg_64spost_ldsttags"): + case Hash("stz2g_64spost_ldsttags"): + case Hash("stzg_64spost_ldsttags"): + suffix = "], #'s2012*16"; + break; + case Hash("st2g_64spre_ldsttags"): + case Hash("stg_64spre_ldsttags"): + case Hash("stz2g_64spre_ldsttags"): + case Hash("stzg_64spre_ldsttags"): + suffix = ", #'s2012*16]!"; + break; + default: + mnemonic_ = "unimplemented"; + break; + } + + if (instr->GetImmLS() == 0) { + suffix = "]"; + } + + Format(instr, mnemonic_.c_str(), form, suffix); +} + +void Disassembler::DisassembleMTELoadTag(const Instruction *instr) { + const char *form = + (instr->GetImmLS() == 0) ? "'Xt, ['Xns]" : "'Xt, ['Xns, #'s2012*16]"; + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleCpy(const Instruction *instr) { + const char *form = "['Xd]!, ['Xs]!, 'Xn!"; + + int d = instr->GetRd(); + int n = instr->GetRn(); + int s = instr->GetRs(); + + // Aliased registers and sp/zr are disallowed. + if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31) || (s == 31)) { + form = NULL; + } + + // Bits 31 and 30 must be zero. + if (instr->ExtractBits(31, 30)) { + form = NULL; + } + + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::DisassembleSet(const Instruction *instr) { + const char *form = "['Xd]!, 'Xn!, 'Xs"; + + int d = instr->GetRd(); + int n = instr->GetRn(); + int s = instr->GetRs(); + + // Aliased registers are disallowed. Only Xs may be xzr. + if ((d == n) || (d == s) || (n == s) || (d == 31) || (n == 31)) { + form = NULL; + } + + // Bits 31 and 30 must be zero. + if (instr->ExtractBits(31, 30)) { + form = NULL; + } + + Format(instr, mnemonic_.c_str(), form); +} + +void Disassembler::ProcessOutput(const Instruction * /*instr*/) { + // The base disasm does nothing more than disassembling into a buffer. +} + + +void Disassembler::AppendRegisterNameToOutput(const Instruction *instr, + const CPURegister ®) { + USE(instr); + VIXL_ASSERT(reg.IsValid()); + char reg_char; + + if (reg.IsRegister()) { + reg_char = reg.Is64Bits() ? 'x' : 'w'; + } else { + VIXL_ASSERT(reg.IsVRegister()); + switch (reg.GetSizeInBits()) { + case kBRegSize: + reg_char = 'b'; + break; + case kHRegSize: + reg_char = 'h'; + break; + case kSRegSize: + reg_char = 's'; + break; + case kDRegSize: + reg_char = 'd'; + break; + default: + VIXL_ASSERT(reg.Is128Bits()); + reg_char = 'q'; + } + } + + if (reg.IsVRegister() || !(reg.Aliases(sp) || reg.Aliases(xzr))) { + // A core or scalar/vector register: [wx]0 - 30, [bhsdq]0 - 31. + AppendToOutput("%c%d", reg_char, reg.GetCode()); + } else if (reg.Aliases(sp)) { + // Disassemble w31/x31 as stack pointer wsp/sp. + AppendToOutput("%s", reg.Is64Bits() ? "sp" : "wsp"); + } else { + // Disassemble w31/x31 as zero register wzr/xzr. + AppendToOutput("%czr", reg_char); + } +} + + +void Disassembler::AppendPCRelativeOffsetToOutput(const Instruction *instr, + int64_t offset) { + USE(instr); + if (offset < 0) { + // Cast to uint64_t so that INT64_MIN is handled in a well-defined way. + uint64_t abs_offset = UnsignedNegate(static_cast(offset)); + AppendToOutput("#-0x%" PRIx64, abs_offset); + } else { + AppendToOutput("#+0x%" PRIx64, offset); + } +} + + +void Disassembler::AppendAddressToOutput(const Instruction *instr, + const void *addr) { + USE(instr); + AppendToOutput("(addr 0x%" PRIxPTR ")", reinterpret_cast(addr)); +} + + +void Disassembler::AppendCodeAddressToOutput(const Instruction *instr, + const void *addr) { + AppendAddressToOutput(instr, addr); +} + + +void Disassembler::AppendDataAddressToOutput(const Instruction *instr, + const void *addr) { + AppendAddressToOutput(instr, addr); +} + + +void Disassembler::AppendCodeRelativeAddressToOutput(const Instruction *instr, + const void *addr) { + USE(instr); + int64_t rel_addr = CodeRelativeAddress(addr); + if (rel_addr >= 0) { + AppendToOutput("(addr 0x%" PRIx64 ")", rel_addr); + } else { + AppendToOutput("(addr -0x%" PRIx64 ")", -rel_addr); + } +} + + +void Disassembler::AppendCodeRelativeCodeAddressToOutput( + const Instruction *instr, const void *addr) { + AppendCodeRelativeAddressToOutput(instr, addr); +} + + +void Disassembler::AppendCodeRelativeDataAddressToOutput( + const Instruction *instr, const void *addr) { + AppendCodeRelativeAddressToOutput(instr, addr); +} + + +void Disassembler::MapCodeAddress(int64_t base_address, + const Instruction *instr_address) { + set_code_address_offset(base_address - + reinterpret_cast(instr_address)); +} +int64_t Disassembler::CodeRelativeAddress(const void *addr) { + return reinterpret_cast(addr) + code_address_offset(); +} + + +void Disassembler::Format(const Instruction *instr, + const char *mnemonic, + const char *format0, + const char *format1) { + if ((mnemonic == NULL) || (format0 == NULL)) { + VisitUnallocated(instr); + } else { + ResetOutput(); + Substitute(instr, mnemonic); + if (format0[0] != 0) { // Not a zero-length string. + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_++] = ' '; + Substitute(instr, format0); + // TODO: consider using a zero-length string here, too. + if (format1 != NULL) { + Substitute(instr, format1); + } + } + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_] = 0; + ProcessOutput(instr); + } +} + +void Disassembler::FormatWithDecodedMnemonic(const Instruction *instr, + const char *format0, + const char *format1) { + Format(instr, mnemonic_.c_str(), format0, format1); +} + +void Disassembler::Substitute(const Instruction *instr, const char *string) { + char chr = *string++; + while (chr != '\0') { + if (chr == '\'') { + string += SubstituteField(instr, string); + } else { + VIXL_ASSERT(buffer_pos_ < buffer_size_); + buffer_[buffer_pos_++] = chr; + } + chr = *string++; + } +} + + +int Disassembler::SubstituteField(const Instruction *instr, + const char *format) { + switch (format[0]) { + // NB. The remaining substitution prefix upper-case characters are: JU. + case 'R': // Register. X or W, selected by sf (or alternative) bit. + case 'F': // FP register. S or D, selected by type field. + case 'V': // Vector register, V, vector format. + case 'Z': // Scalable vector register. + case 'W': + case 'X': + case 'B': + case 'H': + case 'S': + case 'D': + case 'Q': + return SubstituteRegisterField(instr, format); + case 'P': + return SubstitutePredicateRegisterField(instr, format); + case 'I': + return SubstituteImmediateField(instr, format); + case 'L': + return SubstituteLiteralField(instr, format); + case 'N': + return SubstituteShiftField(instr, format); + case 'C': + return SubstituteConditionField(instr, format); + case 'E': + return SubstituteExtendField(instr, format); + case 'A': + return SubstitutePCRelAddressField(instr, format); + case 'T': + return SubstituteBranchTargetField(instr, format); + case 'O': + return SubstituteLSRegOffsetField(instr, format); + case 'M': + return SubstituteBarrierField(instr, format); + case 'K': + return SubstituteCrField(instr, format); + case 'G': + return SubstituteSysOpField(instr, format); + case 'p': + return SubstitutePrefetchField(instr, format); + case 'u': + case 's': + return SubstituteIntField(instr, format); + case 't': + return SubstituteSVESize(instr, format); + case '?': + return SubstituteTernary(instr, format); + default: { + VIXL_UNREACHABLE(); + return 1; + } + } +} + +std::pair Disassembler::GetRegNumForField( + const Instruction *instr, char reg_prefix, const char *field) { + unsigned reg_num = UINT_MAX; + unsigned field_len = 1; + + switch (field[0]) { + case 'd': + reg_num = instr->GetRd(); + break; + case 'n': + reg_num = instr->GetRn(); + break; + case 'm': + reg_num = instr->GetRm(); + break; + case 'e': + // This is register Rm, but using a 4-bit specifier. Used in NEON + // by-element instructions. + reg_num = instr->GetRmLow16(); + break; + case 'f': + // This is register Rm, but using an element size dependent number of bits + // in the register specifier. + reg_num = + (instr->GetNEONSize() < 2) ? instr->GetRmLow16() : instr->GetRm(); + break; + case 'a': + reg_num = instr->GetRa(); + break; + case 's': + reg_num = instr->GetRs(); + break; + case 't': + reg_num = instr->GetRt(); + break; + default: + VIXL_UNREACHABLE(); + } + + switch (field[1]) { + case '2': + case '3': + case '4': + if ((reg_prefix == 'V') || (reg_prefix == 'Z')) { // t2/3/4, n2/3/4 + VIXL_ASSERT((field[0] == 't') || (field[0] == 'n')); + reg_num = (reg_num + field[1] - '1') % 32; + field_len++; + } else { + VIXL_ASSERT((field[0] == 't') && (field[1] == '2')); + reg_num = instr->GetRt2(); + field_len++; + } + break; + case '+': // Rt+, Rs+ (ie. Rt + 1, Rs + 1) + VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X')); + VIXL_ASSERT((field[0] == 's') || (field[0] == 't')); + reg_num++; + field_len++; + break; + case 's': // Core registers that are (w)sp rather than zr. + VIXL_ASSERT((reg_prefix == 'W') || (reg_prefix == 'X')); + reg_num = (reg_num == kZeroRegCode) ? kSPRegInternalCode : reg_num; + field_len++; + break; + } + + VIXL_ASSERT(reg_num != UINT_MAX); + return std::make_pair(reg_num, field_len); +} + +int Disassembler::SubstituteRegisterField(const Instruction *instr, + const char *format) { + unsigned field_len = 1; // Initially, count only the first character. + + // The first character of the register format field, eg R, X, S, etc. + char reg_prefix = format[0]; + + // Pointer to the character after the prefix. This may be one of the standard + // symbols representing a register encoding, or a two digit bit position, + // handled by the following code. + const char *reg_field = &format[1]; + + if (reg_prefix == 'R') { + bool is_x = instr->GetSixtyFourBits() == 1; + if (strspn(reg_field, "0123456789") == 2) { // r20d, r31n, etc. + // Core W or X registers where the type is determined by a specified bit + // position, eg. 'R20d, 'R05n. This is like the 'Rd syntax, where bit 31 + // is implicitly used to select between W and X. + int bitpos = ((reg_field[0] - '0') * 10) + (reg_field[1] - '0'); + VIXL_ASSERT(bitpos <= 31); + is_x = (instr->ExtractBit(bitpos) == 1); + reg_field = &format[3]; + field_len += 2; + } + reg_prefix = is_x ? 'X' : 'W'; + } + + std::pair rn = + GetRegNumForField(instr, reg_prefix, reg_field); + unsigned reg_num = rn.first; + field_len += rn.second; + + if (reg_field[0] == 'm') { + switch (reg_field[1]) { + // Handle registers tagged with b (bytes), z (instruction), or + // r (registers), used for address updates in NEON load/store + // instructions. + case 'r': + case 'b': + case 'z': { + VIXL_ASSERT(reg_prefix == 'X'); + field_len = 3; + char *eimm; + int imm = static_cast(strtol(®_field[2], &eimm, 10)); + field_len += static_cast(eimm - ®_field[2]); + if (reg_num == 31) { + switch (reg_field[1]) { + case 'z': + imm *= (1 << instr->GetNEONLSSize()); + break; + case 'r': + imm *= (instr->GetNEONQ() == 0) ? kDRegSizeInBytes + : kQRegSizeInBytes; + break; + case 'b': + break; + } + AppendToOutput("#%d", imm); + return field_len; + } + break; + } + } + } + + CPURegister::RegisterType reg_type = CPURegister::kRegister; + unsigned reg_size = kXRegSize; + + if (reg_prefix == 'F') { + switch (instr->GetFPType()) { + case 3: + reg_prefix = 'H'; + break; + case 0: + reg_prefix = 'S'; + break; + default: + reg_prefix = 'D'; + } + } + + switch (reg_prefix) { + case 'W': + reg_type = CPURegister::kRegister; + reg_size = kWRegSize; + break; + case 'X': + reg_type = CPURegister::kRegister; + reg_size = kXRegSize; + break; + case 'B': + reg_type = CPURegister::kVRegister; + reg_size = kBRegSize; + break; + case 'H': + reg_type = CPURegister::kVRegister; + reg_size = kHRegSize; + break; + case 'S': + reg_type = CPURegister::kVRegister; + reg_size = kSRegSize; + break; + case 'D': + reg_type = CPURegister::kVRegister; + reg_size = kDRegSize; + break; + case 'Q': + reg_type = CPURegister::kVRegister; + reg_size = kQRegSize; + break; + case 'V': + if (reg_field[1] == 'v') { + reg_type = CPURegister::kVRegister; + reg_size = 1 << (instr->GetSVESize() + 3); + field_len++; + break; + } + AppendToOutput("v%d", reg_num); + return field_len; + case 'Z': + AppendToOutput("z%d", reg_num); + return field_len; + default: + VIXL_UNREACHABLE(); + } + + AppendRegisterNameToOutput(instr, CPURegister(reg_num, reg_size, reg_type)); + + return field_len; +} + +int Disassembler::SubstitutePredicateRegisterField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'P'); + switch (format[1]) { + // This field only supports P register that are always encoded in the same + // position. + case 'd': + case 't': + AppendToOutput("p%u", instr->GetPt()); + break; + case 'n': + AppendToOutput("p%u", instr->GetPn()); + break; + case 'm': + AppendToOutput("p%u", instr->GetPm()); + break; + case 'g': + VIXL_ASSERT(format[2] == 'l'); + AppendToOutput("p%u", instr->GetPgLow8()); + return 3; + default: + VIXL_UNREACHABLE(); + } + return 2; +} + +int Disassembler::SubstituteImmediateField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'I'); + + switch (format[1]) { + case 'M': { // IMoveImm, IMoveNeg or IMoveLSL. + if (format[5] == 'L') { + AppendToOutput("#0x%" PRIx32, instr->GetImmMoveWide()); + if (instr->GetShiftMoveWide() > 0) { + AppendToOutput(", lsl #%" PRId32, 16 * instr->GetShiftMoveWide()); + } + } else { + VIXL_ASSERT((format[5] == 'I') || (format[5] == 'N')); + uint64_t imm = static_cast(instr->GetImmMoveWide()) + << (16 * instr->GetShiftMoveWide()); + if (format[5] == 'N') imm = ~imm; + if (!instr->GetSixtyFourBits()) imm &= UINT64_C(0xffffffff); + AppendToOutput("#0x%" PRIx64, imm); + } + return 8; + } + case 'L': { + switch (format[2]) { + case 'L': { // ILLiteral - Immediate Load Literal. + AppendToOutput("pc%+" PRId32, + instr->GetImmLLiteral() * + static_cast(kLiteralEntrySize)); + return 9; + } + case 'S': { // ILS - Immediate Load/Store. + // ILSi - As above, but an index field which must not be + // omitted even if it is zero. + bool is_index = format[3] == 'i'; + if (is_index || (instr->GetImmLS() != 0)) { + AppendToOutput(", #%" PRId32, instr->GetImmLS()); + } + return is_index ? 4 : 3; + } + case 'P': { // ILPx - Immediate Load/Store Pair, x = access size. + // ILPxi - As above, but an index field which must not be + // omitted even if it is zero. + VIXL_ASSERT((format[3] >= '0') && (format[3] <= '9')); + bool is_index = format[4] == 'i'; + if (is_index || (instr->GetImmLSPair() != 0)) { + // format[3] is the scale value. Convert to a number. + int scale = 1 << (format[3] - '0'); + AppendToOutput(", #%" PRId32, instr->GetImmLSPair() * scale); + } + return is_index ? 5 : 4; + } + case 'U': { // ILU - Immediate Load/Store Unsigned. + if (instr->GetImmLSUnsigned() != 0) { + int shift = instr->GetSizeLS(); + AppendToOutput(", #%" PRId32, instr->GetImmLSUnsigned() << shift); + } + return 3; + } + case 'A': { // ILA - Immediate Load with pointer authentication. + if (instr->GetImmLSPAC() != 0) { + AppendToOutput(", #%" PRId32, instr->GetImmLSPAC()); + } + return 3; + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + } + case 'C': { // ICondB - Immediate Conditional Branch. + int64_t offset = instr->GetImmCondBranch() << 2; + AppendPCRelativeOffsetToOutput(instr, offset); + return 6; + } + case 'A': { // IAddSub. + int64_t imm = instr->GetImmAddSub() << (12 * instr->GetImmAddSubShift()); + AppendToOutput("#0x%" PRIx64 " (%" PRId64 ")", imm, imm); + return 7; + } + case 'F': { // IFP, IFPNeon, IFPSve or IFPFBits. + int imm8 = 0; + size_t len = strlen("IFP"); + switch (format[3]) { + case 'F': + VIXL_ASSERT(strncmp(format, "IFPFBits", strlen("IFPFBits")) == 0); + AppendToOutput("#%" PRId32, 64 - instr->GetFPScale()); + return static_cast(strlen("IFPFBits")); + case 'N': + VIXL_ASSERT(strncmp(format, "IFPNeon", strlen("IFPNeon")) == 0); + imm8 = instr->GetImmNEONabcdefgh(); + len += strlen("Neon"); + break; + case 'S': + VIXL_ASSERT(strncmp(format, "IFPSve", strlen("IFPSve")) == 0); + imm8 = instr->ExtractBits(12, 5); + len += strlen("Sve"); + break; + default: + VIXL_ASSERT(strncmp(format, "IFP", strlen("IFP")) == 0); + imm8 = instr->GetImmFP(); + break; + } + AppendToOutput("#0x%" PRIx32 " (%.4f)", + imm8, + Instruction::Imm8ToFP32(imm8)); + return static_cast(len); + } + case 'H': { // IH - ImmHint + AppendToOutput("#%" PRId32, instr->GetImmHint()); + return 2; + } + case 'T': { // ITri - Immediate Triangular Encoded. + if (format[4] == 'S') { + VIXL_ASSERT((format[5] == 'v') && (format[6] == 'e')); + switch (format[7]) { + case 'l': + // SVE logical immediate encoding. + AppendToOutput("#0x%" PRIx64, instr->GetSVEImmLogical()); + return 8; + case 'p': { + // SVE predicated shift immediate encoding, lsl. + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ true); + int lane_bits = 8 << shift_and_lane_size.second; + AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first); + return 8; + } + case 'q': { + // SVE predicated shift immediate encoding, asr and lsr. + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ true); + AppendToOutput("#%" PRId32, shift_and_lane_size.first); + return 8; + } + case 'r': { + // SVE unpredicated shift immediate encoding, left shifts. + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ false); + int lane_bits = 8 << shift_and_lane_size.second; + AppendToOutput("#%" PRId32, lane_bits - shift_and_lane_size.first); + return 8; + } + case 's': { + // SVE unpredicated shift immediate encoding, right shifts. + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2( + /* is_predicated = */ false); + AppendToOutput("#%" PRId32, shift_and_lane_size.first); + return 8; + } + default: + VIXL_UNREACHABLE(); + return 0; + } + } else { + AppendToOutput("#0x%" PRIx64, instr->GetImmLogical()); + return 4; + } + } + case 'N': { // INzcv. + int nzcv = (instr->GetNzcv() << Flags_offset); + AppendToOutput("#%c%c%c%c", + ((nzcv & NFlag) == 0) ? 'n' : 'N', + ((nzcv & ZFlag) == 0) ? 'z' : 'Z', + ((nzcv & CFlag) == 0) ? 'c' : 'C', + ((nzcv & VFlag) == 0) ? 'v' : 'V'); + return 5; + } + case 'P': { // IP - Conditional compare. + AppendToOutput("#%" PRId32, instr->GetImmCondCmp()); + return 2; + } + case 'B': { // Bitfields. + return SubstituteBitfieldImmediateField(instr, format); + } + case 'E': { // IExtract. + AppendToOutput("#%" PRId32, instr->GetImmS()); + return 8; + } + case 't': { // It - Test and branch bit. + AppendToOutput("#%" PRId32, + (instr->GetImmTestBranchBit5() << 5) | + instr->GetImmTestBranchBit40()); + return 2; + } + case 'S': { // ISveSvl - SVE 'mul vl' immediate for structured ld/st. + VIXL_ASSERT(strncmp(format, "ISveSvl", 7) == 0); + int imm = instr->ExtractSignedBits(19, 16); + if (imm != 0) { + int reg_count = instr->ExtractBits(22, 21) + 1; + AppendToOutput(", #%d, mul vl", imm * reg_count); + } + return 7; + } + case 's': { // Is - Shift (immediate). + switch (format[2]) { + case 'R': { // IsR - right shifts. + int shift = 16 << HighestSetBitPosition(instr->GetImmNEONImmh()); + shift -= instr->GetImmNEONImmhImmb(); + AppendToOutput("#%d", shift); + return 3; + } + case 'L': { // IsL - left shifts. + int shift = instr->GetImmNEONImmhImmb(); + shift -= 8 << HighestSetBitPosition(instr->GetImmNEONImmh()); + AppendToOutput("#%d", shift); + return 3; + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + } + case 'D': { // IDebug - HLT and BRK instructions. + AppendToOutput("#0x%" PRIx32, instr->GetImmException()); + return 6; + } + case 'U': { // IUdf - UDF immediate. + AppendToOutput("#0x%" PRIx32, instr->GetImmUdf()); + return 4; + } + case 'V': { // Immediate Vector. + switch (format[2]) { + case 'E': { // IVExtract. + AppendToOutput("#%" PRId32, instr->GetImmNEONExt()); + return 9; + } + case 'B': { // IVByElemIndex. + int ret = static_cast(strlen("IVByElemIndex")); + uint32_t vm_index = instr->GetNEONH() << 2; + vm_index |= instr->GetNEONL() << 1; + vm_index |= instr->GetNEONM(); + + static const char *format_rot = "IVByElemIndexRot"; + static const char *format_fhm = "IVByElemIndexFHM"; + if (strncmp(format, format_rot, strlen(format_rot)) == 0) { + // FCMLA uses 'H' bit index when SIZE is 2, else H:L + VIXL_ASSERT((instr->GetNEONSize() == 1) || + (instr->GetNEONSize() == 2)); + vm_index >>= instr->GetNEONSize(); + ret = static_cast(strlen(format_rot)); + } else if (strncmp(format, format_fhm, strlen(format_fhm)) == 0) { + // Nothing to do - FMLAL and FMLSL use H:L:M. + ret = static_cast(strlen(format_fhm)); + } else { + if (instr->GetNEONSize() == 2) { + // S-sized elements use H:L. + vm_index >>= 1; + } else if (instr->GetNEONSize() == 3) { + // D-sized elements use H. + vm_index >>= 2; + } + } + AppendToOutput("%d", vm_index); + return ret; + } + case 'I': { // INS element. + if (strncmp(format, "IVInsIndex", strlen("IVInsIndex")) == 0) { + unsigned rd_index, rn_index; + unsigned imm5 = instr->GetImmNEON5(); + unsigned imm4 = instr->GetImmNEON4(); + int tz = CountTrailingZeros(imm5, 32); + if (tz <= 3) { // Defined for tz = 0 to 3 only. + rd_index = imm5 >> (tz + 1); + rn_index = imm4 >> tz; + if (strncmp(format, "IVInsIndex1", strlen("IVInsIndex1")) == 0) { + AppendToOutput("%d", rd_index); + return static_cast(strlen("IVInsIndex1")); + } else if (strncmp(format, + "IVInsIndex2", + strlen("IVInsIndex2")) == 0) { + AppendToOutput("%d", rn_index); + return static_cast(strlen("IVInsIndex2")); + } + } + return 0; + } else if (strncmp(format, + "IVInsSVEIndex", + strlen("IVInsSVEIndex")) == 0) { + std::pair index_and_lane_size = + instr->GetSVEPermuteIndexAndLaneSizeLog2(); + AppendToOutput("%d", index_and_lane_size.first); + return static_cast(strlen("IVInsSVEIndex")); + } + VIXL_FALLTHROUGH(); + } + case 'L': { // IVLSLane[0123] - suffix indicates access size shift. + AppendToOutput("%d", instr->GetNEONLSIndex(format[8] - '0')); + return 9; + } + case 'M': { // Modified Immediate cases. + if (strncmp(format, "IVMIImm8", strlen("IVMIImm8")) == 0) { + uint64_t imm8 = instr->GetImmNEONabcdefgh(); + AppendToOutput("#0x%" PRIx64, imm8); + return static_cast(strlen("IVMIImm8")); + } else if (strncmp(format, "IVMIImm", strlen("IVMIImm")) == 0) { + uint64_t imm8 = instr->GetImmNEONabcdefgh(); + uint64_t imm = 0; + for (int i = 0; i < 8; ++i) { + if (imm8 & (UINT64_C(1) << i)) { + imm |= (UINT64_C(0xff) << (8 * i)); + } + } + AppendToOutput("#0x%" PRIx64, imm); + return static_cast(strlen("IVMIImm")); + } else if (strncmp(format, + "IVMIShiftAmt1", + strlen("IVMIShiftAmt1")) == 0) { + int cmode = instr->GetNEONCmode(); + int shift_amount = 8 * ((cmode >> 1) & 3); + AppendToOutput("#%d", shift_amount); + return static_cast(strlen("IVMIShiftAmt1")); + } else if (strncmp(format, + "IVMIShiftAmt2", + strlen("IVMIShiftAmt2")) == 0) { + int cmode = instr->GetNEONCmode(); + int shift_amount = 8 << (cmode & 1); + AppendToOutput("#%d", shift_amount); + return static_cast(strlen("IVMIShiftAmt2")); + } else { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + } + case 'X': { // IX - CLREX instruction. + AppendToOutput("#0x%" PRIx32, instr->GetCRm()); + return 2; + } + case 'Y': { // IY - system register immediate. + switch (instr->GetImmSystemRegister()) { + case NZCV: + AppendToOutput("nzcv"); + break; + case FPCR: + AppendToOutput("fpcr"); + break; + case RNDR: + AppendToOutput("rndr"); + break; + case RNDRRS: + AppendToOutput("rndrrs"); + break; + default: + AppendToOutput("S%d_%d_c%d_c%d_%d", + instr->GetSysOp0(), + instr->GetSysOp1(), + instr->GetCRn(), + instr->GetCRm(), + instr->GetSysOp2()); + break; + } + return 2; + } + case 'R': { // IR - Rotate right into flags. + switch (format[2]) { + case 'r': { // IRr - Rotate amount. + AppendToOutput("#%d", instr->GetImmRMIFRotation()); + return 3; + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } + } + case 'p': { // Ipc - SVE predicate constraint specifier. + VIXL_ASSERT(format[2] == 'c'); + unsigned pattern = instr->GetImmSVEPredicateConstraint(); + switch (pattern) { + // VL1-VL8 are encoded directly. + case SVE_VL1: + case SVE_VL2: + case SVE_VL3: + case SVE_VL4: + case SVE_VL5: + case SVE_VL6: + case SVE_VL7: + case SVE_VL8: + AppendToOutput("vl%u", pattern); + break; + // VL16-VL256 are encoded as log2(N) + c. + case SVE_VL16: + case SVE_VL32: + case SVE_VL64: + case SVE_VL128: + case SVE_VL256: + AppendToOutput("vl%u", 16 << (pattern - SVE_VL16)); + break; + // Special cases. + case SVE_POW2: + AppendToOutput("pow2"); + break; + case SVE_MUL4: + AppendToOutput("mul4"); + break; + case SVE_MUL3: + AppendToOutput("mul3"); + break; + case SVE_ALL: + AppendToOutput("all"); + break; + default: + AppendToOutput("#0x%x", pattern); + break; + } + return 3; + } + default: { + VIXL_UNIMPLEMENTED(); + return 0; + } + } +} + + +int Disassembler::SubstituteBitfieldImmediateField(const Instruction *instr, + const char *format) { + VIXL_ASSERT((format[0] == 'I') && (format[1] == 'B')); + unsigned r = instr->GetImmR(); + unsigned s = instr->GetImmS(); + + switch (format[2]) { + case 'r': { // IBr. + AppendToOutput("#%d", r); + return 3; + } + case 's': { // IBs+1 or IBs-r+1. + if (format[3] == '+') { + AppendToOutput("#%d", s + 1); + return 5; + } else { + VIXL_ASSERT(format[3] == '-'); + AppendToOutput("#%d", s - r + 1); + return 7; + } + } + case 'Z': { // IBZ-r. + VIXL_ASSERT((format[3] == '-') && (format[4] == 'r')); + unsigned reg_size = + (instr->GetSixtyFourBits() == 1) ? kXRegSize : kWRegSize; + AppendToOutput("#%d", reg_size - r); + return 5; + } + default: { + VIXL_UNREACHABLE(); + return 0; + } + } +} + + +int Disassembler::SubstituteLiteralField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(strncmp(format, "LValue", 6) == 0); + USE(format); + + const void *address = instr->GetLiteralAddress(); + switch (instr->Mask(LoadLiteralMask)) { + case LDR_w_lit: + case LDR_x_lit: + case LDRSW_x_lit: + case LDR_s_lit: + case LDR_d_lit: + case LDR_q_lit: + AppendCodeRelativeDataAddressToOutput(instr, address); + break; + case PRFM_lit: { + // Use the prefetch hint to decide how to print the address. + switch (instr->GetPrefetchHint()) { + case 0x0: // PLD: prefetch for load. + case 0x2: // PST: prepare for store. + AppendCodeRelativeDataAddressToOutput(instr, address); + break; + case 0x1: // PLI: preload instructions. + AppendCodeRelativeCodeAddressToOutput(instr, address); + break; + case 0x3: // Unallocated hint. + AppendCodeRelativeAddressToOutput(instr, address); + break; + } + break; + } + default: + VIXL_UNREACHABLE(); + } + + return 6; +} + + +int Disassembler::SubstituteShiftField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'N'); + VIXL_ASSERT(instr->GetShiftDP() <= 0x3); + + switch (format[1]) { + case 'D': { // NDP. + VIXL_ASSERT(instr->GetShiftDP() != ROR); + VIXL_FALLTHROUGH(); + } + case 'L': { // NLo. + if (instr->GetImmDPShift() != 0) { + const char *shift_type[] = {"lsl", "lsr", "asr", "ror"}; + AppendToOutput(", %s #%" PRId32, + shift_type[instr->GetShiftDP()], + instr->GetImmDPShift()); + } + return 3; + } + case 'S': { // NSveS (SVE structured load/store indexing shift). + VIXL_ASSERT(strncmp(format, "NSveS", 5) == 0); + int msz = instr->ExtractBits(24, 23); + if (msz > 0) { + AppendToOutput(", lsl #%d", msz); + } + return 5; + } + default: + VIXL_UNIMPLEMENTED(); + return 0; + } +} + + +int Disassembler::SubstituteConditionField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'C'); + const char *condition_code[] = {"eq", + "ne", + "hs", + "lo", + "mi", + "pl", + "vs", + "vc", + "hi", + "ls", + "ge", + "lt", + "gt", + "le", + "al", + "nv"}; + int cond; + switch (format[1]) { + case 'B': + cond = instr->GetConditionBranch(); + break; + case 'I': { + cond = InvertCondition(static_cast(instr->GetCondition())); + break; + } + default: + cond = instr->GetCondition(); + } + AppendToOutput("%s", condition_code[cond]); + return 4; +} + + +int Disassembler::SubstitutePCRelAddressField(const Instruction *instr, + const char *format) { + VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) || // Used by `adr`. + (strcmp(format, "AddrPCRelPage") == 0)); // Used by `adrp`. + + int64_t offset = instr->GetImmPCRel(); + + // Compute the target address based on the effective address (after applying + // code_address_offset). This is required for correct behaviour of adrp. + const Instruction *base = instr + code_address_offset(); + if (format[9] == 'P') { + offset *= kPageSize; + base = AlignDown(base, kPageSize); + } + // Strip code_address_offset before printing, so we can use the + // semantically-correct AppendCodeRelativeAddressToOutput. + const void *target = + reinterpret_cast(base + offset - code_address_offset()); + + AppendPCRelativeOffsetToOutput(instr, offset); + AppendToOutput(" "); + AppendCodeRelativeAddressToOutput(instr, target); + return 13; +} + + +int Disassembler::SubstituteBranchTargetField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(strncmp(format, "TImm", 4) == 0); + + int64_t offset = 0; + switch (format[5]) { + // BImmUncn - unconditional branch immediate. + case 'n': + offset = instr->GetImmUncondBranch(); + break; + // BImmCond - conditional branch immediate. + case 'o': + offset = instr->GetImmCondBranch(); + break; + // BImmCmpa - compare and branch immediate. + case 'm': + offset = instr->GetImmCmpBranch(); + break; + // BImmTest - test and branch immediate. + case 'e': + offset = instr->GetImmTestBranch(); + break; + default: + VIXL_UNIMPLEMENTED(); + } + offset *= static_cast(kInstructionSize); + const void *target_address = reinterpret_cast(instr + offset); + VIXL_STATIC_ASSERT(sizeof(*instr) == 1); + + AppendPCRelativeOffsetToOutput(instr, offset); + AppendToOutput(" "); + AppendCodeRelativeCodeAddressToOutput(instr, target_address); + + return 8; +} + + +int Disassembler::SubstituteExtendField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(strncmp(format, "Ext", 3) == 0); + VIXL_ASSERT(instr->GetExtendMode() <= 7); + USE(format); + + const char *extend_mode[] = + {"uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx"}; + + // If rd or rn is SP, uxtw on 32-bit registers and uxtx on 64-bit + // registers becomes lsl. + if (((instr->GetRd() == kZeroRegCode) || (instr->GetRn() == kZeroRegCode)) && + (((instr->GetExtendMode() == UXTW) && (instr->GetSixtyFourBits() == 0)) || + (instr->GetExtendMode() == UXTX))) { + if (instr->GetImmExtendShift() > 0) { + AppendToOutput(", lsl #%" PRId32, instr->GetImmExtendShift()); + } + } else { + AppendToOutput(", %s", extend_mode[instr->GetExtendMode()]); + if (instr->GetImmExtendShift() > 0) { + AppendToOutput(" #%" PRId32, instr->GetImmExtendShift()); + } + } + return 3; +} + + +int Disassembler::SubstituteLSRegOffsetField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(strncmp(format, "Offsetreg", 9) == 0); + const char *extend_mode[] = {"undefined", + "undefined", + "uxtw", + "lsl", + "undefined", + "undefined", + "sxtw", + "sxtx"}; + USE(format); + + unsigned shift = instr->GetImmShiftLS(); + Extend ext = static_cast(instr->GetExtendMode()); + char reg_type = ((ext == UXTW) || (ext == SXTW)) ? 'w' : 'x'; + + unsigned rm = instr->GetRm(); + if (rm == kZeroRegCode) { + AppendToOutput("%czr", reg_type); + } else { + AppendToOutput("%c%d", reg_type, rm); + } + + // Extend mode UXTX is an alias for shift mode LSL here. + if (!((ext == UXTX) && (shift == 0))) { + AppendToOutput(", %s", extend_mode[ext]); + if (shift != 0) { + AppendToOutput(" #%d", instr->GetSizeLS()); + } + } + return 9; +} + + +int Disassembler::SubstitutePrefetchField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'p'); + USE(format); + + bool is_sve = + (strncmp(format, "prefSVEOp", strlen("prefSVEOp")) == 0) ? true : false; + int placeholder_length = is_sve ? 9 : 6; + static const char *stream_options[] = {"keep", "strm"}; + + auto get_hints = [](bool want_sve_hint) -> std::vector { + static const std::vector sve_hints = {"ld", "st"}; + static const std::vector core_hints = {"ld", "li", "st"}; + return (want_sve_hint) ? sve_hints : core_hints; + }; + + std::vector hints = get_hints(is_sve); + unsigned hint = + is_sve ? instr->GetSVEPrefetchHint() : instr->GetPrefetchHint(); + unsigned target = instr->GetPrefetchTarget() + 1; + unsigned stream = instr->GetPrefetchStream(); + + if ((hint >= hints.size()) || (target > 3)) { + // Unallocated prefetch operations. + if (is_sve) { + std::bitset<4> prefetch_mode(instr->GetSVEImmPrefetchOperation()); + AppendToOutput("#0b%s", prefetch_mode.to_string().c_str()); + } else { + std::bitset<5> prefetch_mode(instr->GetImmPrefetchOperation()); + AppendToOutput("#0b%s", prefetch_mode.to_string().c_str()); + } + } else { + VIXL_ASSERT(stream < ArrayLength(stream_options)); + AppendToOutput("p%sl%d%s", + hints[hint].c_str(), + target, + stream_options[stream]); + } + return placeholder_length; +} + +int Disassembler::SubstituteBarrierField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'M'); + USE(format); + + static const char *options[4][4] = {{"sy (0b0000)", "oshld", "oshst", "osh"}, + {"sy (0b0100)", "nshld", "nshst", "nsh"}, + {"sy (0b1000)", "ishld", "ishst", "ish"}, + {"sy (0b1100)", "ld", "st", "sy"}}; + int domain = instr->GetImmBarrierDomain(); + int type = instr->GetImmBarrierType(); + + AppendToOutput("%s", options[domain][type]); + return 1; +} + +int Disassembler::SubstituteSysOpField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'G'); + int op = -1; + switch (format[1]) { + case '1': + op = instr->GetSysOp1(); + break; + case '2': + op = instr->GetSysOp2(); + break; + default: + VIXL_UNREACHABLE(); + } + AppendToOutput("#%d", op); + return 2; +} + +int Disassembler::SubstituteCrField(const Instruction *instr, + const char *format) { + VIXL_ASSERT(format[0] == 'K'); + int cr = -1; + switch (format[1]) { + case 'n': + cr = instr->GetCRn(); + break; + case 'm': + cr = instr->GetCRm(); + break; + default: + VIXL_UNREACHABLE(); + } + AppendToOutput("C%d", cr); + return 2; +} + +int Disassembler::SubstituteIntField(const Instruction *instr, + const char *format) { + VIXL_ASSERT((format[0] == 'u') || (format[0] == 's')); + + // A generic signed or unsigned int field uses a placeholder of the form + // 'sAABB and 'uAABB respectively where AA and BB are two digit bit positions + // between 00 and 31, and AA >= BB. The placeholder is substituted with the + // decimal integer represented by the bits in the instruction between + // positions AA and BB inclusive. + // + // In addition, split fields can be represented using 'sAABB:CCDD, where CCDD + // become the least-significant bits of the result, and bit AA is the sign bit + // (if 's is used). + int32_t bits = 0; + int width = 0; + const char *c = format; + do { + c++; // Skip the 'u', 's' or ':'. + VIXL_ASSERT(strspn(c, "0123456789") == 4); + int msb = ((c[0] - '0') * 10) + (c[1] - '0'); + int lsb = ((c[2] - '0') * 10) + (c[3] - '0'); + c += 4; // Skip the characters we just read. + int chunk_width = msb - lsb + 1; + VIXL_ASSERT((chunk_width > 0) && (chunk_width < 32)); + bits = (bits << chunk_width) | (instr->ExtractBits(msb, lsb)); + width += chunk_width; + } while (*c == ':'); + VIXL_ASSERT(IsUintN(width, bits)); + + if (format[0] == 's') { + bits = ExtractSignedBitfield32(width - 1, 0, bits); + } + + if (*c == '+') { + // A "+n" trailing the format specifier indicates the extracted value should + // be incremented by n. This is for cases where the encoding is zero-based, + // but range of values is not, eg. values [1, 16] encoded as [0, 15] + char *new_c; + uint64_t value = strtoul(c + 1, &new_c, 10); + c = new_c; + VIXL_ASSERT(IsInt32(value)); + bits = static_cast(bits + value); + } else if (*c == '*') { + // Similarly, a "*n" trailing the format specifier indicates the extracted + // value should be multiplied by n. This is for cases where the encoded + // immediate is scaled, for example by access size. + char *new_c; + uint64_t value = strtoul(c + 1, &new_c, 10); + c = new_c; + VIXL_ASSERT(IsInt32(value)); + bits = static_cast(bits * value); + } + + AppendToOutput("%d", bits); + + return static_cast(c - format); +} + +int Disassembler::SubstituteSVESize(const Instruction *instr, + const char *format) { + USE(format); + VIXL_ASSERT(format[0] == 't'); + + static const char sizes[] = {'b', 'h', 's', 'd', 'q'}; + unsigned size_in_bytes_log2 = instr->GetSVESize(); + int placeholder_length = 1; + switch (format[1]) { + case 'f': // 'tf - FP size encoded in <18:17> + placeholder_length++; + size_in_bytes_log2 = instr->ExtractBits(18, 17); + break; + case 'l': + placeholder_length++; + if (format[2] == 's') { + // 'tls: Loads and stores + size_in_bytes_log2 = instr->ExtractBits(22, 21); + placeholder_length++; + if (format[3] == 's') { + // Sign extension load. + unsigned msize = instr->ExtractBits(24, 23); + if (msize > size_in_bytes_log2) size_in_bytes_log2 ^= 0x3; + placeholder_length++; + } + } else { + // 'tl: Logical operations + size_in_bytes_log2 = instr->GetSVEBitwiseImmLaneSizeInBytesLog2(); + } + break; + case 'm': // 'tmsz + VIXL_ASSERT(strncmp(format, "tmsz", 4) == 0); + placeholder_length += 3; + size_in_bytes_log2 = instr->ExtractBits(24, 23); + break; + case 'i': { // 'ti: indices. + std::pair index_and_lane_size = + instr->GetSVEPermuteIndexAndLaneSizeLog2(); + placeholder_length++; + size_in_bytes_log2 = index_and_lane_size.second; + break; + } + case 's': + if (format[2] == 'z') { + VIXL_ASSERT((format[3] == 'p') || (format[3] == 's') || + (format[3] == 'd')); + bool is_predicated = (format[3] == 'p'); + std::pair shift_and_lane_size = + instr->GetSVEImmShiftAndLaneSizeLog2(is_predicated); + size_in_bytes_log2 = shift_and_lane_size.second; + if (format[3] == 'd') { // Double size lanes. + size_in_bytes_log2++; + } + placeholder_length += 3; // skip "sz(p|s|d)" + } + break; + case 'h': + // Half size of the lane size field. + size_in_bytes_log2 -= 1; + placeholder_length++; + break; + case 'q': + // Quarter size of the lane size field. + size_in_bytes_log2 -= 2; + placeholder_length++; + break; + default: + break; + } + + VIXL_ASSERT(size_in_bytes_log2 < ArrayLength(sizes)); + AppendToOutput("%c", sizes[size_in_bytes_log2]); + + return placeholder_length; +} + +int Disassembler::SubstituteTernary(const Instruction *instr, + const char *format) { + VIXL_ASSERT((format[0] == '?') && (format[3] == ':')); + + // The ternary substitution of the format "'?bb:TF" is replaced by a single + // character, either T or F, depending on the value of the bit at position + // bb in the instruction. For example, "'?31:xw" is substituted with "x" if + // bit 31 is true, and "w" otherwise. + VIXL_ASSERT(strspn(&format[1], "0123456789") == 2); + char *c; + uint64_t value = strtoul(&format[1], &c, 10); + VIXL_ASSERT(value < (kInstructionSize * kBitsPerByte)); + VIXL_ASSERT((*c == ':') && (strlen(c) >= 3)); // Minimum of ":TF" + c++; + AppendToOutput("%c", c[1 - instr->ExtractBit(static_cast(value))]); + return 6; +} + +void Disassembler::ResetOutput() { + buffer_pos_ = 0; + buffer_[buffer_pos_] = 0; +} + + +void Disassembler::AppendToOutput(const char *format, ...) { + va_list args; + va_start(args, format); + buffer_pos_ += vsnprintf(&buffer_[buffer_pos_], + buffer_size_ - buffer_pos_, + format, + args); + va_end(args); +} + + +void PrintDisassembler::Disassemble(const Instruction *instr) { + Decoder decoder; + if (cpu_features_auditor_ != NULL) { + decoder.AppendVisitor(cpu_features_auditor_); + } + decoder.AppendVisitor(this); + decoder.Decode(instr); +} + +void PrintDisassembler::DisassembleBuffer(const Instruction *start, + const Instruction *end) { + Decoder decoder; + if (cpu_features_auditor_ != NULL) { + decoder.AppendVisitor(cpu_features_auditor_); + } + decoder.AppendVisitor(this); + decoder.Decode(start, end); +} + +void PrintDisassembler::DisassembleBuffer(const Instruction *start, + uint64_t size) { + DisassembleBuffer(start, start + size); +} + + +void PrintDisassembler::ProcessOutput(const Instruction *instr) { + int64_t address = CodeRelativeAddress(instr); + + uint64_t abs_address; + const char *sign; + if (signed_addresses_) { + if (address < 0) { + sign = "-"; + abs_address = UnsignedNegate(static_cast(address)); + } else { + // Leave a leading space, to maintain alignment. + sign = " "; + abs_address = address; + } + } else { + sign = ""; + abs_address = address; + } + + int bytes_printed = fprintf(stream_, + "%s0x%016" PRIx64 " %08" PRIx32 "\t\t%s", + sign, + abs_address, + instr->GetInstructionBits(), + GetOutput()); + if (cpu_features_auditor_ != NULL) { + CPUFeatures needs = cpu_features_auditor_->GetInstructionFeatures(); + needs.Remove(cpu_features_auditor_->GetAvailableFeatures()); + if (needs != CPUFeatures::None()) { + // Try to align annotations. This value is arbitrary, but based on looking + // good with most instructions. Note that, for historical reasons, the + // disassembly itself is printed with tab characters, so bytes_printed is + // _not_ equivalent to the number of occupied screen columns. However, the + // prefix before the tabs is always the same length, so the annotation + // indentation does not change from one line to the next. + const int indent_to = 70; + // Always allow some space between the instruction and the annotation. + const int min_pad = 2; + + int pad = std::max(min_pad, (indent_to - bytes_printed)); + fprintf(stream_, "%*s", pad, ""); + + std::stringstream features; + features << needs; + fprintf(stream_, + "%s%s%s", + cpu_features_prefix_, + features.str().c_str(), + cpu_features_suffix_); + } + } + fprintf(stream_, "\n"); +} + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/instructions-aarch64.cc b/3rdparty/vixl/src/aarch64/instructions-aarch64.cc new file mode 100644 index 0000000000..a2d0547219 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/instructions-aarch64.cc @@ -0,0 +1,1419 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "instructions-aarch64.h" +#include "assembler-aarch64.h" + +namespace vixl { +namespace aarch64 { + +static uint64_t RepeatBitsAcrossReg(unsigned reg_size, + uint64_t value, + unsigned width) { + VIXL_ASSERT((width == 2) || (width == 4) || (width == 8) || (width == 16) || + (width == 32)); + VIXL_ASSERT((reg_size == kBRegSize) || (reg_size == kHRegSize) || + (reg_size == kSRegSize) || (reg_size == kDRegSize)); + uint64_t result = value & ((UINT64_C(1) << width) - 1); + for (unsigned i = width; i < reg_size; i *= 2) { + result |= (result << i); + } + return result; +} + +bool Instruction::CanTakeSVEMovprfx(const char* form, + const Instruction* movprfx) const { + return CanTakeSVEMovprfx(Hash(form), movprfx); +} + +bool Instruction::CanTakeSVEMovprfx(uint32_t form_hash, + const Instruction* movprfx) const { + bool movprfx_is_predicated = movprfx->Mask(SVEMovprfxMask) == MOVPRFX_z_p_z; + bool movprfx_is_unpredicated = + movprfx->Mask(SVEConstructivePrefix_UnpredicatedMask) == MOVPRFX_z_z; + VIXL_ASSERT(movprfx_is_predicated != movprfx_is_unpredicated); + + int movprfx_zd = movprfx->GetRd(); + int movprfx_pg = movprfx_is_predicated ? movprfx->GetPgLow8() : -1; + VectorFormat movprfx_vform = + movprfx_is_predicated ? movprfx->GetSVEVectorFormat() : kFormatUndefined; + + bool pg_matches_low8 = movprfx_pg == GetPgLow8(); + bool vform_matches = movprfx_vform == GetSVEVectorFormat(); + bool zd_matches = movprfx_zd == GetRd(); + bool zd_isnt_zn = movprfx_zd != GetRn(); + bool zd_isnt_zm = movprfx_zd != GetRm(); + + switch (form_hash) { + case "cdot_z_zzzi_s"_h: + case "sdot_z_zzzi_s"_h: + case "sudot_z_zzzi_s"_h: + case "udot_z_zzzi_s"_h: + case "usdot_z_zzzi_s"_h: + return (GetRd() != static_cast(ExtractBits(18, 16))) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "cdot_z_zzzi_d"_h: + case "sdot_z_zzzi_d"_h: + case "udot_z_zzzi_d"_h: + return (GetRd() != static_cast(ExtractBits(19, 16))) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "fmlalb_z_zzzi_s"_h: + case "fmlalt_z_zzzi_s"_h: + case "fmlslb_z_zzzi_s"_h: + case "fmlslt_z_zzzi_s"_h: + case "smlalb_z_zzzi_d"_h: + case "smlalb_z_zzzi_s"_h: + case "smlalt_z_zzzi_d"_h: + case "smlalt_z_zzzi_s"_h: + case "smlslb_z_zzzi_d"_h: + case "smlslb_z_zzzi_s"_h: + case "smlslt_z_zzzi_d"_h: + case "smlslt_z_zzzi_s"_h: + case "sqdmlalb_z_zzzi_d"_h: + case "sqdmlalb_z_zzzi_s"_h: + case "sqdmlalt_z_zzzi_d"_h: + case "sqdmlalt_z_zzzi_s"_h: + case "sqdmlslb_z_zzzi_d"_h: + case "sqdmlslb_z_zzzi_s"_h: + case "sqdmlslt_z_zzzi_d"_h: + case "sqdmlslt_z_zzzi_s"_h: + case "umlalb_z_zzzi_d"_h: + case "umlalb_z_zzzi_s"_h: + case "umlalt_z_zzzi_d"_h: + case "umlalt_z_zzzi_s"_h: + case "umlslb_z_zzzi_d"_h: + case "umlslb_z_zzzi_s"_h: + case "umlslt_z_zzzi_d"_h: + case "umlslt_z_zzzi_s"_h: + return (GetRd() != GetSVEMulLongZmAndIndex().first) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "cmla_z_zzzi_h"_h: + case "cmla_z_zzzi_s"_h: + case "fcmla_z_zzzi_h"_h: + case "fcmla_z_zzzi_s"_h: + case "fmla_z_zzzi_d"_h: + case "fmla_z_zzzi_h"_h: + case "fmla_z_zzzi_s"_h: + case "fmls_z_zzzi_d"_h: + case "fmls_z_zzzi_h"_h: + case "fmls_z_zzzi_s"_h: + case "mla_z_zzzi_d"_h: + case "mla_z_zzzi_h"_h: + case "mla_z_zzzi_s"_h: + case "mls_z_zzzi_d"_h: + case "mls_z_zzzi_h"_h: + case "mls_z_zzzi_s"_h: + case "sqrdcmlah_z_zzzi_h"_h: + case "sqrdcmlah_z_zzzi_s"_h: + case "sqrdmlah_z_zzzi_d"_h: + case "sqrdmlah_z_zzzi_h"_h: + case "sqrdmlah_z_zzzi_s"_h: + case "sqrdmlsh_z_zzzi_d"_h: + case "sqrdmlsh_z_zzzi_h"_h: + case "sqrdmlsh_z_zzzi_s"_h: + return (GetRd() != GetSVEMulZmAndIndex().first) && + movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "adclb_z_zzz"_h: + case "adclt_z_zzz"_h: + case "bcax_z_zzz"_h: + case "bsl1n_z_zzz"_h: + case "bsl2n_z_zzz"_h: + case "bsl_z_zzz"_h: + case "cdot_z_zzz"_h: + case "cmla_z_zzz"_h: + case "eor3_z_zzz"_h: + case "eorbt_z_zz"_h: + case "eortb_z_zz"_h: + case "fmlalb_z_zzz"_h: + case "fmlalt_z_zzz"_h: + case "fmlslb_z_zzz"_h: + case "fmlslt_z_zzz"_h: + case "nbsl_z_zzz"_h: + case "saba_z_zzz"_h: + case "sabalb_z_zzz"_h: + case "sabalt_z_zzz"_h: + case "sbclb_z_zzz"_h: + case "sbclt_z_zzz"_h: + case "sdot_z_zzz"_h: + case "smlalb_z_zzz"_h: + case "smlalt_z_zzz"_h: + case "smlslb_z_zzz"_h: + case "smlslt_z_zzz"_h: + case "sqdmlalb_z_zzz"_h: + case "sqdmlalbt_z_zzz"_h: + case "sqdmlalt_z_zzz"_h: + case "sqdmlslb_z_zzz"_h: + case "sqdmlslbt_z_zzz"_h: + case "sqdmlslt_z_zzz"_h: + case "sqrdcmlah_z_zzz"_h: + case "sqrdmlah_z_zzz"_h: + case "sqrdmlsh_z_zzz"_h: + case "uaba_z_zzz"_h: + case "uabalb_z_zzz"_h: + case "uabalt_z_zzz"_h: + case "udot_z_zzz"_h: + case "umlalb_z_zzz"_h: + case "umlalt_z_zzz"_h: + case "umlslb_z_zzz"_h: + case "umlslt_z_zzz"_h: + case "usdot_z_zzz_s"_h: + case "fmmla_z_zzz_s"_h: + case "fmmla_z_zzz_d"_h: + case "smmla_z_zzz"_h: + case "ummla_z_zzz"_h: + case "usmmla_z_zzz"_h: + return movprfx_is_unpredicated && zd_isnt_zm && zd_isnt_zn && zd_matches; + + case "addp_z_p_zz"_h: + case "cadd_z_zz"_h: + case "clasta_z_p_zz"_h: + case "clastb_z_p_zz"_h: + case "decd_z_zs"_h: + case "dech_z_zs"_h: + case "decw_z_zs"_h: + case "ext_z_zi_des"_h: + case "faddp_z_p_zz"_h: + case "fmaxnmp_z_p_zz"_h: + case "fmaxp_z_p_zz"_h: + case "fminnmp_z_p_zz"_h: + case "fminp_z_p_zz"_h: + case "ftmad_z_zzi"_h: + case "incd_z_zs"_h: + case "inch_z_zs"_h: + case "incw_z_zs"_h: + case "insr_z_v"_h: + case "smaxp_z_p_zz"_h: + case "sminp_z_p_zz"_h: + case "splice_z_p_zz_des"_h: + case "sqcadd_z_zz"_h: + case "sqdecd_z_zs"_h: + case "sqdech_z_zs"_h: + case "sqdecw_z_zs"_h: + case "sqincd_z_zs"_h: + case "sqinch_z_zs"_h: + case "sqincw_z_zs"_h: + case "srsra_z_zi"_h: + case "ssra_z_zi"_h: + case "umaxp_z_p_zz"_h: + case "uminp_z_p_zz"_h: + case "uqdecd_z_zs"_h: + case "uqdech_z_zs"_h: + case "uqdecw_z_zs"_h: + case "uqincd_z_zs"_h: + case "uqinch_z_zs"_h: + case "uqincw_z_zs"_h: + case "ursra_z_zi"_h: + case "usra_z_zi"_h: + case "xar_z_zzi"_h: + return movprfx_is_unpredicated && zd_isnt_zn && zd_matches; + + case "add_z_zi"_h: + case "and_z_zi"_h: + case "decp_z_p_z"_h: + case "eor_z_zi"_h: + case "incp_z_p_z"_h: + case "insr_z_r"_h: + case "mul_z_zi"_h: + case "orr_z_zi"_h: + case "smax_z_zi"_h: + case "smin_z_zi"_h: + case "sqadd_z_zi"_h: + case "sqdecp_z_p_z"_h: + case "sqincp_z_p_z"_h: + case "sqsub_z_zi"_h: + case "sub_z_zi"_h: + case "subr_z_zi"_h: + case "umax_z_zi"_h: + case "umin_z_zi"_h: + case "uqadd_z_zi"_h: + case "uqdecp_z_p_z"_h: + case "uqincp_z_p_z"_h: + case "uqsub_z_zi"_h: + return movprfx_is_unpredicated && zd_matches; + + case "cpy_z_p_i"_h: + if (movprfx_is_predicated) { + if (!vform_matches) return false; + if (movprfx_pg != GetRx<19, 16>()) return false; + } + // Only the merging form can take movprfx. + if (ExtractBit(14) == 0) return false; + return zd_matches; + + case "fcpy_z_p_i"_h: + return (movprfx_is_unpredicated || + ((movprfx_pg == GetRx<19, 16>()) && vform_matches)) && + zd_matches; + + case "flogb_z_p_z"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == GetSVEVectorFormat(17)) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "asr_z_p_zi"_h: + case "asrd_z_p_zi"_h: + case "lsl_z_p_zi"_h: + case "lsr_z_p_zi"_h: + case "sqshl_z_p_zi"_h: + case "sqshlu_z_p_zi"_h: + case "srshr_z_p_zi"_h: + case "uqshl_z_p_zi"_h: + case "urshr_z_p_zi"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == + SVEFormatFromLaneSizeInBytesLog2( + GetSVEImmShiftAndLaneSizeLog2(true).second)) && + pg_matches_low8)) && + zd_matches; + + case "fcvt_z_p_z_d2h"_h: + case "fcvt_z_p_z_d2s"_h: + case "fcvt_z_p_z_h2d"_h: + case "fcvt_z_p_z_s2d"_h: + case "fcvtx_z_p_z_d2s"_h: + case "fcvtzs_z_p_z_d2w"_h: + case "fcvtzs_z_p_z_d2x"_h: + case "fcvtzs_z_p_z_fp162x"_h: + case "fcvtzs_z_p_z_s2x"_h: + case "fcvtzu_z_p_z_d2w"_h: + case "fcvtzu_z_p_z_d2x"_h: + case "fcvtzu_z_p_z_fp162x"_h: + case "fcvtzu_z_p_z_s2x"_h: + case "scvtf_z_p_z_w2d"_h: + case "scvtf_z_p_z_x2d"_h: + case "scvtf_z_p_z_x2fp16"_h: + case "scvtf_z_p_z_x2s"_h: + case "ucvtf_z_p_z_w2d"_h: + case "ucvtf_z_p_z_x2d"_h: + case "ucvtf_z_p_z_x2fp16"_h: + case "ucvtf_z_p_z_x2s"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == kFormatVnD) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "fcvtzs_z_p_z_fp162h"_h: + case "fcvtzu_z_p_z_fp162h"_h: + case "scvtf_z_p_z_h2fp16"_h: + case "ucvtf_z_p_z_h2fp16"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == kFormatVnH) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "fcvt_z_p_z_h2s"_h: + case "fcvt_z_p_z_s2h"_h: + case "fcvtzs_z_p_z_fp162w"_h: + case "fcvtzs_z_p_z_s2w"_h: + case "fcvtzu_z_p_z_fp162w"_h: + case "fcvtzu_z_p_z_s2w"_h: + case "scvtf_z_p_z_w2fp16"_h: + case "scvtf_z_p_z_w2s"_h: + case "ucvtf_z_p_z_w2fp16"_h: + case "ucvtf_z_p_z_w2s"_h: + return (movprfx_is_unpredicated || + ((movprfx_vform == kFormatVnS) && pg_matches_low8)) && + zd_isnt_zn && zd_matches; + + case "fcmla_z_p_zzz"_h: + case "fmad_z_p_zzz"_h: + case "fmla_z_p_zzz"_h: + case "fmls_z_p_zzz"_h: + case "fmsb_z_p_zzz"_h: + case "fnmad_z_p_zzz"_h: + case "fnmla_z_p_zzz"_h: + case "fnmls_z_p_zzz"_h: + case "fnmsb_z_p_zzz"_h: + case "mad_z_p_zzz"_h: + case "mla_z_p_zzz"_h: + case "mls_z_p_zzz"_h: + case "msb_z_p_zzz"_h: + return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) && + zd_isnt_zm && zd_isnt_zn && zd_matches; + + case "abs_z_p_z"_h: + case "add_z_p_zz"_h: + case "and_z_p_zz"_h: + case "asr_z_p_zw"_h: + case "asr_z_p_zz"_h: + case "asrr_z_p_zz"_h: + case "bic_z_p_zz"_h: + case "cls_z_p_z"_h: + case "clz_z_p_z"_h: + case "cnot_z_p_z"_h: + case "cnt_z_p_z"_h: + case "cpy_z_p_v"_h: + case "eor_z_p_zz"_h: + case "fabd_z_p_zz"_h: + case "fabs_z_p_z"_h: + case "fadd_z_p_zz"_h: + case "fcadd_z_p_zz"_h: + case "fdiv_z_p_zz"_h: + case "fdivr_z_p_zz"_h: + case "fmax_z_p_zz"_h: + case "fmaxnm_z_p_zz"_h: + case "fmin_z_p_zz"_h: + case "fminnm_z_p_zz"_h: + case "fmul_z_p_zz"_h: + case "fmulx_z_p_zz"_h: + case "fneg_z_p_z"_h: + case "frecpx_z_p_z"_h: + case "frinta_z_p_z"_h: + case "frinti_z_p_z"_h: + case "frintm_z_p_z"_h: + case "frintn_z_p_z"_h: + case "frintp_z_p_z"_h: + case "frintx_z_p_z"_h: + case "frintz_z_p_z"_h: + case "fscale_z_p_zz"_h: + case "fsqrt_z_p_z"_h: + case "fsub_z_p_zz"_h: + case "fsubr_z_p_zz"_h: + case "lsl_z_p_zw"_h: + case "lsl_z_p_zz"_h: + case "lslr_z_p_zz"_h: + case "lsr_z_p_zw"_h: + case "lsr_z_p_zz"_h: + case "lsrr_z_p_zz"_h: + case "mul_z_p_zz"_h: + case "neg_z_p_z"_h: + case "not_z_p_z"_h: + case "orr_z_p_zz"_h: + case "rbit_z_p_z"_h: + case "revb_z_z"_h: + case "revh_z_z"_h: + case "revw_z_z"_h: + case "sabd_z_p_zz"_h: + case "sadalp_z_p_z"_h: + case "sdiv_z_p_zz"_h: + case "sdivr_z_p_zz"_h: + case "shadd_z_p_zz"_h: + case "shsub_z_p_zz"_h: + case "shsubr_z_p_zz"_h: + case "smax_z_p_zz"_h: + case "smin_z_p_zz"_h: + case "smulh_z_p_zz"_h: + case "sqabs_z_p_z"_h: + case "sqadd_z_p_zz"_h: + case "sqneg_z_p_z"_h: + case "sqrshl_z_p_zz"_h: + case "sqrshlr_z_p_zz"_h: + case "sqshl_z_p_zz"_h: + case "sqshlr_z_p_zz"_h: + case "sqsub_z_p_zz"_h: + case "sqsubr_z_p_zz"_h: + case "srhadd_z_p_zz"_h: + case "srshl_z_p_zz"_h: + case "srshlr_z_p_zz"_h: + case "sub_z_p_zz"_h: + case "subr_z_p_zz"_h: + case "suqadd_z_p_zz"_h: + case "sxtb_z_p_z"_h: + case "sxth_z_p_z"_h: + case "sxtw_z_p_z"_h: + case "uabd_z_p_zz"_h: + case "uadalp_z_p_z"_h: + case "udiv_z_p_zz"_h: + case "udivr_z_p_zz"_h: + case "uhadd_z_p_zz"_h: + case "uhsub_z_p_zz"_h: + case "uhsubr_z_p_zz"_h: + case "umax_z_p_zz"_h: + case "umin_z_p_zz"_h: + case "umulh_z_p_zz"_h: + case "uqadd_z_p_zz"_h: + case "uqrshl_z_p_zz"_h: + case "uqrshlr_z_p_zz"_h: + case "uqshl_z_p_zz"_h: + case "uqshlr_z_p_zz"_h: + case "uqsub_z_p_zz"_h: + case "uqsubr_z_p_zz"_h: + case "urecpe_z_p_z"_h: + case "urhadd_z_p_zz"_h: + case "urshl_z_p_zz"_h: + case "urshlr_z_p_zz"_h: + case "ursqrte_z_p_z"_h: + case "usqadd_z_p_zz"_h: + case "uxtb_z_p_z"_h: + case "uxth_z_p_z"_h: + case "uxtw_z_p_z"_h: + return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) && + zd_isnt_zn && zd_matches; + + case "cpy_z_p_r"_h: + case "fadd_z_p_zs"_h: + case "fmax_z_p_zs"_h: + case "fmaxnm_z_p_zs"_h: + case "fmin_z_p_zs"_h: + case "fminnm_z_p_zs"_h: + case "fmul_z_p_zs"_h: + case "fsub_z_p_zs"_h: + case "fsubr_z_p_zs"_h: + return (movprfx_is_unpredicated || (pg_matches_low8 && vform_matches)) && + zd_matches; + default: + return false; + } +} // NOLINT(readability/fn_size) + +bool Instruction::IsLoad() const { + if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { + return false; + } + + if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) { + return Mask(LoadStorePairLBit) != 0; + } else { + LoadStoreOp op = static_cast(Mask(LoadStoreMask)); + switch (op) { + case LDRB_w: + case LDRH_w: + case LDR_w: + case LDR_x: + case LDRSB_w: + case LDRSB_x: + case LDRSH_w: + case LDRSH_x: + case LDRSW_x: + case LDR_b: + case LDR_h: + case LDR_s: + case LDR_d: + case LDR_q: + return true; + default: + return false; + } + } +} + + +bool Instruction::IsStore() const { + if (Mask(LoadStoreAnyFMask) != LoadStoreAnyFixed) { + return false; + } + + if (Mask(LoadStorePairAnyFMask) == LoadStorePairAnyFixed) { + return Mask(LoadStorePairLBit) == 0; + } else { + LoadStoreOp op = static_cast(Mask(LoadStoreMask)); + switch (op) { + case STRB_w: + case STRH_w: + case STR_w: + case STR_x: + case STR_b: + case STR_h: + case STR_s: + case STR_d: + case STR_q: + return true; + default: + return false; + } + } +} + + +std::pair Instruction::GetSVEPermuteIndexAndLaneSizeLog2() const { + uint32_t imm_2 = ExtractBits<0x00C00000>(); + uint32_t tsz_5 = ExtractBits<0x001F0000>(); + uint32_t imm_7 = (imm_2 << 5) | tsz_5; + int lane_size_in_byte_log_2 = std::min(CountTrailingZeros(tsz_5), 5); + int index = ExtractUnsignedBitfield32(6, lane_size_in_byte_log_2 + 1, imm_7); + return std::make_pair(index, lane_size_in_byte_log_2); +} + +// Get the register and index for SVE indexed multiplies encoded in the forms: +// .h : Zm = <18:16>, index = <22><20:19> +// .s : Zm = <18:16>, index = <20:19> +// .d : Zm = <19:16>, index = <20> +std::pair Instruction::GetSVEMulZmAndIndex() const { + int reg_code = GetRmLow16(); + int index = ExtractBits(20, 19); + + // For .h, index uses bit zero of the size field, so kFormatVnB below implies + // half-word lane, with most-significant bit of the index zero. + switch (GetSVEVectorFormat()) { + case kFormatVnD: + index >>= 1; // Only bit 20 in the index for D lanes. + break; + case kFormatVnH: + index += 4; // Bit 22 is the top bit of index. + VIXL_FALLTHROUGH(); + case kFormatVnB: + case kFormatVnS: + reg_code &= 7; // Three bits used for the register. + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + return std::make_pair(reg_code, index); +} + +// Get the register and index for SVE indexed long multiplies encoded in the +// forms: +// .h : Zm = <18:16>, index = <20:19><11> +// .s : Zm = <19:16>, index = <20><11> +std::pair Instruction::GetSVEMulLongZmAndIndex() const { + int reg_code = GetRmLow16(); + int index = ExtractBit(11); + + // For long multiplies, the SVE size field <23:22> encodes the destination + // element size. The source element size is half the width. + switch (GetSVEVectorFormat()) { + case kFormatVnS: + reg_code &= 7; + index |= ExtractBits(20, 19) << 1; + break; + case kFormatVnD: + index |= ExtractBit(20) << 1; + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + return std::make_pair(reg_code, index); +} + +// Logical immediates can't encode zero, so a return value of zero is used to +// indicate a failure case. Specifically, where the constraints on imm_s are +// not met. +uint64_t Instruction::GetImmLogical() const { + unsigned reg_size = GetSixtyFourBits() ? kXRegSize : kWRegSize; + int32_t n = GetBitN(); + int32_t imm_s = GetImmSetBits(); + int32_t imm_r = GetImmRotate(); + return DecodeImmBitMask(n, imm_s, imm_r, reg_size); +} + +// Logical immediates can't encode zero, so a return value of zero is used to +// indicate a failure case. Specifically, where the constraints on imm_s are +// not met. +uint64_t Instruction::GetSVEImmLogical() const { + int n = GetSVEBitN(); + int imm_s = GetSVEImmSetBits(); + int imm_r = GetSVEImmRotate(); + int lane_size_in_bytes_log2 = GetSVEBitwiseImmLaneSizeInBytesLog2(); + switch (lane_size_in_bytes_log2) { + case kDRegSizeInBytesLog2: + case kSRegSizeInBytesLog2: + case kHRegSizeInBytesLog2: + case kBRegSizeInBytesLog2: { + int lane_size_in_bits = 1 << (lane_size_in_bytes_log2 + 3); + return DecodeImmBitMask(n, imm_s, imm_r, lane_size_in_bits); + } + default: + return 0; + } +} + +std::pair Instruction::GetSVEImmShiftAndLaneSizeLog2( + bool is_predicated) const { + Instr tsize = + is_predicated ? ExtractBits<0x00C00300>() : ExtractBits<0x00D80000>(); + Instr imm_3 = + is_predicated ? ExtractBits<0x000000E0>() : ExtractBits<0x00070000>(); + if (tsize == 0) { + // The bit field `tsize` means undefined if it is zero, so return a + // convenience value kWMinInt to indicate a failure case. + return std::make_pair(kWMinInt, kWMinInt); + } + + int lane_size_in_bytes_log_2 = 32 - CountLeadingZeros(tsize, 32) - 1; + int esize = (1 << lane_size_in_bytes_log_2) * kBitsPerByte; + int shift = (2 * esize) - ((tsize << 3) | imm_3); + return std::make_pair(shift, lane_size_in_bytes_log_2); +} + +int Instruction::GetSVEMsizeFromDtype(bool is_signed, int dtype_h_lsb) const { + Instr dtype_h = ExtractBits(dtype_h_lsb + 1, dtype_h_lsb); + if (is_signed) { + dtype_h = dtype_h ^ 0x3; + } + return dtype_h; +} + +int Instruction::GetSVEEsizeFromDtype(bool is_signed, int dtype_l_lsb) const { + Instr dtype_l = ExtractBits(dtype_l_lsb + 1, dtype_l_lsb); + if (is_signed) { + dtype_l = dtype_l ^ 0x3; + } + return dtype_l; +} + +int Instruction::GetSVEBitwiseImmLaneSizeInBytesLog2() const { + int n = GetSVEBitN(); + int imm_s = GetSVEImmSetBits(); + unsigned type_bitset = + (n << SVEImmSetBits_width) | (~imm_s & GetUintMask(SVEImmSetBits_width)); + + // An lane size is constructed from the n and imm_s bits according to + // the following table: + // + // N imms size + // 0 0xxxxx 32 + // 0 10xxxx 16 + // 0 110xxx 8 + // 0 1110xx 8 + // 0 11110x 8 + // 1 xxxxxx 64 + + if (type_bitset == 0) { + // Bail out early since `HighestSetBitPosition` doesn't accept zero + // value input. + return -1; + } + + switch (HighestSetBitPosition(type_bitset)) { + case 6: + return kDRegSizeInBytesLog2; + case 5: + return kSRegSizeInBytesLog2; + case 4: + return kHRegSizeInBytesLog2; + case 3: + case 2: + case 1: + return kBRegSizeInBytesLog2; + default: + // RESERVED encoding. + return -1; + } +} + +int Instruction::GetSVEExtractImmediate() const { + const int imm8h_mask = 0x001F0000; + const int imm8l_mask = 0x00001C00; + return ExtractBits(); +} + +uint64_t Instruction::DecodeImmBitMask(int32_t n, + int32_t imm_s, + int32_t imm_r, + int32_t size) const { + // An integer is constructed from the n, imm_s and imm_r bits according to + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 + // bits are set. The pattern is rotated right by R, and repeated across a + // 32 or 64-bit value, depending on destination register width. + // + + if (n == 1) { + if (imm_s == 0x3f) { + return 0; + } + uint64_t bits = (UINT64_C(1) << (imm_s + 1)) - 1; + return RotateRight(bits, imm_r, 64); + } else { + if ((imm_s >> 1) == 0x1f) { + return 0; + } + for (int width = 0x20; width >= 0x2; width >>= 1) { + if ((imm_s & width) == 0) { + int mask = width - 1; + if ((imm_s & mask) == mask) { + return 0; + } + uint64_t bits = (UINT64_C(1) << ((imm_s & mask) + 1)) - 1; + return RepeatBitsAcrossReg(size, + RotateRight(bits, imm_r & mask, width), + width); + } + } + } + VIXL_UNREACHABLE(); + return 0; +} + + +uint32_t Instruction::GetImmNEONabcdefgh() const { + return GetImmNEONabc() << 5 | GetImmNEONdefgh(); +} + + +Float16 Instruction::Imm8ToFloat16(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Half: aBbb.cdef.gh00.0000 (16 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint16_t bit7 = (bits >> 7) & 0x1; + uint16_t bit6 = (bits >> 6) & 0x1; + uint16_t bit5_to_0 = bits & 0x3f; + uint16_t result = (bit7 << 15) | ((4 - bit6) << 12) | (bit5_to_0 << 6); + return RawbitsToFloat16(result); +} + + +float Instruction::Imm8ToFP32(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint32_t bit7 = (bits >> 7) & 0x1; + uint32_t bit6 = (bits >> 6) & 0x1; + uint32_t bit5_to_0 = bits & 0x3f; + uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19); + + return RawbitsToFloat(result); +} + + +Float16 Instruction::GetImmFP16() const { return Imm8ToFloat16(GetImmFP()); } + + +float Instruction::GetImmFP32() const { return Imm8ToFP32(GetImmFP()); } + + +double Instruction::Imm8ToFP64(uint32_t imm8) { + // Imm8: abcdefgh (8 bits) + // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 + // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits) + // where B is b ^ 1 + uint32_t bits = imm8; + uint64_t bit7 = (bits >> 7) & 0x1; + uint64_t bit6 = (bits >> 6) & 0x1; + uint64_t bit5_to_0 = bits & 0x3f; + uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48); + + return RawbitsToDouble(result); +} + + +double Instruction::GetImmFP64() const { return Imm8ToFP64(GetImmFP()); } + + +Float16 Instruction::GetImmNEONFP16() const { + return Imm8ToFloat16(GetImmNEONabcdefgh()); +} + + +float Instruction::GetImmNEONFP32() const { + return Imm8ToFP32(GetImmNEONabcdefgh()); +} + + +double Instruction::GetImmNEONFP64() const { + return Imm8ToFP64(GetImmNEONabcdefgh()); +} + + +unsigned CalcLSDataSize(LoadStoreOp op) { + VIXL_ASSERT((LSSize_offset + LSSize_width) == (kInstructionSize * 8)); + unsigned size = static_cast(op) >> LSSize_offset; + if ((op & LSVector_mask) != 0) { + // Vector register memory operations encode the access size in the "size" + // and "opc" fields. + if ((size == 0) && ((op & LSOpc_mask) >> LSOpc_offset) >= 2) { + size = kQRegSizeInBytesLog2; + } + } + return size; +} + + +unsigned CalcLSPairDataSize(LoadStorePairOp op) { + VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes); + VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes); + switch (op) { + case STP_q: + case LDP_q: + return kQRegSizeInBytesLog2; + case STP_x: + case LDP_x: + case STP_d: + case LDP_d: + return kXRegSizeInBytesLog2; + default: + return kWRegSizeInBytesLog2; + } +} + + +int Instruction::GetImmBranchRangeBitwidth(ImmBranchType branch_type) { + switch (branch_type) { + case UncondBranchType: + return ImmUncondBranch_width; + case CondBranchType: + return ImmCondBranch_width; + case CompareBranchType: + return ImmCmpBranch_width; + case TestBranchType: + return ImmTestBranch_width; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + + +int32_t Instruction::GetImmBranchForwardRange(ImmBranchType branch_type) { + int32_t encoded_max = 1 << (GetImmBranchRangeBitwidth(branch_type) - 1); + return encoded_max * kInstructionSize; +} + + +bool Instruction::IsValidImmPCOffset(ImmBranchType branch_type, + int64_t offset) { + return IsIntN(GetImmBranchRangeBitwidth(branch_type), offset); +} + + +const Instruction* Instruction::GetImmPCOffsetTarget() const { + const Instruction* base = this; + ptrdiff_t offset; + if (IsPCRelAddressing()) { + // ADR and ADRP. + offset = GetImmPCRel(); + if (Mask(PCRelAddressingMask) == ADRP) { + base = AlignDown(base, kPageSize); + offset *= kPageSize; + } else { + VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR); + } + } else { + // All PC-relative branches. + VIXL_ASSERT(GetBranchType() != UnknownBranchType); + // Relative branch offsets are instruction-size-aligned. + offset = GetImmBranch() * static_cast(kInstructionSize); + } + return base + offset; +} + + +int Instruction::GetImmBranch() const { + switch (GetBranchType()) { + case CondBranchType: + return GetImmCondBranch(); + case UncondBranchType: + return GetImmUncondBranch(); + case CompareBranchType: + return GetImmCmpBranch(); + case TestBranchType: + return GetImmTestBranch(); + default: + VIXL_UNREACHABLE(); + } + return 0; +} + + +void Instruction::SetImmPCOffsetTarget(const Instruction* target) { + if (IsPCRelAddressing()) { + SetPCRelImmTarget(target); + } else { + SetBranchImmTarget(target); + } +} + + +void Instruction::SetPCRelImmTarget(const Instruction* target) { + ptrdiff_t imm21; + if ((Mask(PCRelAddressingMask) == ADR)) { + imm21 = target - this; + } else { + VIXL_ASSERT(Mask(PCRelAddressingMask) == ADRP); + uintptr_t this_page = reinterpret_cast(this) / kPageSize; + uintptr_t target_page = reinterpret_cast(target) / kPageSize; + imm21 = target_page - this_page; + } + Instr imm = Assembler::ImmPCRelAddress(static_cast(imm21)); + + SetInstructionBits(Mask(~ImmPCRel_mask) | imm); +} + + +void Instruction::SetBranchImmTarget(const Instruction* target) { + VIXL_ASSERT(((target - this) & 3) == 0); + Instr branch_imm = 0; + uint32_t imm_mask = 0; + int offset = static_cast((target - this) >> kInstructionSizeLog2); + switch (GetBranchType()) { + case CondBranchType: { + branch_imm = Assembler::ImmCondBranch(offset); + imm_mask = ImmCondBranch_mask; + break; + } + case UncondBranchType: { + branch_imm = Assembler::ImmUncondBranch(offset); + imm_mask = ImmUncondBranch_mask; + break; + } + case CompareBranchType: { + branch_imm = Assembler::ImmCmpBranch(offset); + imm_mask = ImmCmpBranch_mask; + break; + } + case TestBranchType: { + branch_imm = Assembler::ImmTestBranch(offset); + imm_mask = ImmTestBranch_mask; + break; + } + default: + VIXL_UNREACHABLE(); + } + SetInstructionBits(Mask(~imm_mask) | branch_imm); +} + + +void Instruction::SetImmLLiteral(const Instruction* source) { + VIXL_ASSERT(IsWordAligned(source)); + ptrdiff_t offset = (source - this) >> kLiteralEntrySizeLog2; + Instr imm = Assembler::ImmLLiteral(static_cast(offset)); + Instr mask = ImmLLiteral_mask; + + SetInstructionBits(Mask(~mask) | imm); +} + + +VectorFormat VectorFormatHalfWidth(VectorFormat vform) { + switch (vform) { + case kFormat8H: + return kFormat8B; + case kFormat4S: + return kFormat4H; + case kFormat2D: + return kFormat2S; + case kFormatH: + return kFormatB; + case kFormatS: + return kFormatH; + case kFormatD: + return kFormatS; + case kFormatVnH: + return kFormatVnB; + case kFormatVnS: + return kFormatVnH; + case kFormatVnD: + return kFormatVnS; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat VectorFormatDoubleWidth(VectorFormat vform) { + switch (vform) { + case kFormat8B: + return kFormat8H; + case kFormat4H: + return kFormat4S; + case kFormat2S: + return kFormat2D; + case kFormatB: + return kFormatH; + case kFormatH: + return kFormatS; + case kFormatS: + return kFormatD; + case kFormatVnB: + return kFormatVnH; + case kFormatVnH: + return kFormatVnS; + case kFormatVnS: + return kFormatVnD; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat VectorFormatFillQ(VectorFormat vform) { + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: + return kFormat16B; + case kFormatH: + case kFormat4H: + case kFormat8H: + return kFormat8H; + case kFormatS: + case kFormat2S: + case kFormat4S: + return kFormat4S; + case kFormatD: + case kFormat1D: + case kFormat2D: + return kFormat2D; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + +VectorFormat VectorFormatHalfWidthDoubleLanes(VectorFormat vform) { + switch (vform) { + case kFormat4H: + return kFormat8B; + case kFormat8H: + return kFormat16B; + case kFormat2S: + return kFormat4H; + case kFormat4S: + return kFormat8H; + case kFormat1D: + return kFormat2S; + case kFormat2D: + return kFormat4S; + case kFormatVnH: + return kFormatVnB; + case kFormatVnS: + return kFormatVnH; + case kFormatVnD: + return kFormatVnS; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + +VectorFormat VectorFormatDoubleLanes(VectorFormat vform) { + VIXL_ASSERT(vform == kFormat8B || vform == kFormat4H || vform == kFormat2S); + switch (vform) { + case kFormat8B: + return kFormat16B; + case kFormat4H: + return kFormat8H; + case kFormat2S: + return kFormat4S; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat VectorFormatHalfLanes(VectorFormat vform) { + VIXL_ASSERT(vform == kFormat16B || vform == kFormat8H || vform == kFormat4S); + switch (vform) { + case kFormat16B: + return kFormat8B; + case kFormat8H: + return kFormat4H; + case kFormat4S: + return kFormat2S; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat ScalarFormatFromLaneSize(int lane_size_in_bits) { + switch (lane_size_in_bits) { + case 8: + return kFormatB; + case 16: + return kFormatH; + case 32: + return kFormatS; + case 64: + return kFormatD; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +bool IsSVEFormat(VectorFormat vform) { + switch (vform) { + case kFormatVnB: + case kFormatVnH: + case kFormatVnS: + case kFormatVnD: + case kFormatVnQ: + case kFormatVnO: + return true; + default: + return false; + } +} + + +VectorFormat SVEFormatFromLaneSizeInBytes(int lane_size_in_bytes) { + switch (lane_size_in_bytes) { + case 1: + return kFormatVnB; + case 2: + return kFormatVnH; + case 4: + return kFormatVnS; + case 8: + return kFormatVnD; + case 16: + return kFormatVnQ; + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat SVEFormatFromLaneSizeInBits(int lane_size_in_bits) { + switch (lane_size_in_bits) { + case 8: + case 16: + case 32: + case 64: + case 128: + return SVEFormatFromLaneSizeInBytes(lane_size_in_bits / kBitsPerByte); + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat SVEFormatFromLaneSizeInBytesLog2(int lane_size_in_bytes_log2) { + switch (lane_size_in_bytes_log2) { + case 0: + case 1: + case 2: + case 3: + case 4: + return SVEFormatFromLaneSizeInBytes(1 << lane_size_in_bytes_log2); + default: + VIXL_UNREACHABLE(); + return kFormatUndefined; + } +} + + +VectorFormat ScalarFormatFromFormat(VectorFormat vform) { + return ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); +} + + +unsigned RegisterSizeInBitsFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + VIXL_ASSERT(!IsSVEFormat(vform)); + switch (vform) { + case kFormatB: + return kBRegSize; + case kFormatH: + return kHRegSize; + case kFormatS: + case kFormat2H: + return kSRegSize; + case kFormatD: + case kFormat8B: + case kFormat4H: + case kFormat2S: + case kFormat1D: + return kDRegSize; + case kFormat16B: + case kFormat8H: + case kFormat4S: + case kFormat2D: + return kQRegSize; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + + +unsigned RegisterSizeInBytesFromFormat(VectorFormat vform) { + return RegisterSizeInBitsFromFormat(vform) / 8; +} + + +unsigned LaneSizeInBitsFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: + case kFormatVnB: + return 8; + case kFormatH: + case kFormat2H: + case kFormat4H: + case kFormat8H: + case kFormatVnH: + return 16; + case kFormatS: + case kFormat2S: + case kFormat4S: + case kFormatVnS: + return 32; + case kFormatD: + case kFormat1D: + case kFormat2D: + case kFormatVnD: + return 64; + case kFormatVnQ: + return 128; + case kFormatVnO: + return 256; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + + +int LaneSizeInBytesFromFormat(VectorFormat vform) { + return LaneSizeInBitsFromFormat(vform) / 8; +} + + +int LaneSizeInBytesLog2FromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: + case kFormatVnB: + return 0; + case kFormatH: + case kFormat2H: + case kFormat4H: + case kFormat8H: + case kFormatVnH: + return 1; + case kFormatS: + case kFormat2S: + case kFormat4S: + case kFormatVnS: + return 2; + case kFormatD: + case kFormat1D: + case kFormat2D: + case kFormatVnD: + return 3; + case kFormatVnQ: + return 4; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + + +int LaneCountFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormat16B: + return 16; + case kFormat8B: + case kFormat8H: + return 8; + case kFormat4H: + case kFormat4S: + return 4; + case kFormat2H: + case kFormat2S: + case kFormat2D: + return 2; + case kFormat1D: + case kFormatB: + case kFormatH: + case kFormatS: + case kFormatD: + return 1; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + + +int MaxLaneCountFromFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormat8B: + case kFormat16B: + return 16; + case kFormatH: + case kFormat4H: + case kFormat8H: + return 8; + case kFormatS: + case kFormat2S: + case kFormat4S: + return 4; + case kFormatD: + case kFormat1D: + case kFormat2D: + return 2; + default: + VIXL_UNREACHABLE(); + return 0; + } +} + + +// Does 'vform' indicate a vector format or a scalar format? +bool IsVectorFormat(VectorFormat vform) { + VIXL_ASSERT(vform != kFormatUndefined); + switch (vform) { + case kFormatB: + case kFormatH: + case kFormatS: + case kFormatD: + return false; + default: + return true; + } +} + + +int64_t MaxIntFromFormat(VectorFormat vform) { + int lane_size = LaneSizeInBitsFromFormat(vform); + return static_cast(GetUintMask(lane_size) >> 1); +} + + +int64_t MinIntFromFormat(VectorFormat vform) { + return -MaxIntFromFormat(vform) - 1; +} + + +uint64_t MaxUintFromFormat(VectorFormat vform) { + return GetUintMask(LaneSizeInBitsFromFormat(vform)); +} + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/logic-aarch64.cc b/3rdparty/vixl/src/aarch64/logic-aarch64.cc new file mode 100644 index 0000000000..11229ad658 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/logic-aarch64.cc @@ -0,0 +1,7854 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + +#include + +#include "simulator-aarch64.h" + +namespace vixl { +namespace aarch64 { + +using vixl::internal::SimFloat16; + +template +bool IsFloat64() { + return false; +} +template <> +bool IsFloat64() { + return true; +} + +template +bool IsFloat32() { + return false; +} +template <> +bool IsFloat32() { + return true; +} + +template +bool IsFloat16() { + return false; +} +template <> +bool IsFloat16() { + return true; +} +template <> +bool IsFloat16() { + return true; +} + +template <> +double Simulator::FPDefaultNaN() { + return kFP64DefaultNaN; +} + + +template <> +float Simulator::FPDefaultNaN() { + return kFP32DefaultNaN; +} + + +template <> +SimFloat16 Simulator::FPDefaultNaN() { + return SimFloat16(kFP16DefaultNaN); +} + + +double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { + if (src >= 0) { + return UFixedToDouble(src, fbits, round); + } else if (src == INT64_MIN) { + return -UFixedToDouble(src, fbits, round); + } else { + return -UFixedToDouble(-src, fbits, round); + } +} + + +double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { + // An input of 0 is a special case because the result is effectively + // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. + if (src == 0) { + return 0.0; + } + + // Calculate the exponent. The highest significant bit will have the value + // 2^exponent. + const int highest_significant_bit = 63 - CountLeadingZeros(src); + const int64_t exponent = highest_significant_bit - fbits; + + return FPRoundToDouble(0, exponent, src, round); +} + + +float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { + if (src >= 0) { + return UFixedToFloat(src, fbits, round); + } else if (src == INT64_MIN) { + return -UFixedToFloat(src, fbits, round); + } else { + return -UFixedToFloat(-src, fbits, round); + } +} + + +float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { + // An input of 0 is a special case because the result is effectively + // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. + if (src == 0) { + return 0.0f; + } + + // Calculate the exponent. The highest significant bit will have the value + // 2^exponent. + const int highest_significant_bit = 63 - CountLeadingZeros(src); + const int32_t exponent = highest_significant_bit - fbits; + + return FPRoundToFloat(0, exponent, src, round); +} + + +SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) { + if (src >= 0) { + return UFixedToFloat16(src, fbits, round); + } else if (src == INT64_MIN) { + return -UFixedToFloat16(src, fbits, round); + } else { + return -UFixedToFloat16(-src, fbits, round); + } +} + + +SimFloat16 Simulator::UFixedToFloat16(uint64_t src, + int fbits, + FPRounding round) { + // An input of 0 is a special case because the result is effectively + // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. + if (src == 0) { + return 0.0f; + } + + // Calculate the exponent. The highest significant bit will have the value + // 2^exponent. + const int highest_significant_bit = 63 - CountLeadingZeros(src); + const int16_t exponent = highest_significant_bit - fbits; + + return FPRoundToFloat16(0, exponent, src, round); +} + + +uint64_t Simulator::GenerateRandomTag(uint16_t exclude) { + uint64_t rtag = nrand48(rand_state_) >> 28; + VIXL_ASSERT(IsUint4(rtag)); + + if (exclude == 0) { + exclude = nrand48(rand_state_) >> 27; + } + + // TODO: implement this to better match the specification, which calls for a + // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by + // PRNG. + return ChooseNonExcludedTag(rtag, 0, exclude); +} + + +void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + LoadLane(dst, vform, i, addr); + addr += LaneSizeInBytesFromFormat(vform); + } +} + + +void Simulator::ld1(VectorFormat vform, + LogicVRegister dst, + int index, + uint64_t addr) { + LoadLane(dst, vform, index, addr); +} + + +void Simulator::ld1r(VectorFormat vform, + VectorFormat unpack_vform, + LogicVRegister dst, + uint64_t addr, + bool is_signed) { + unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (is_signed) { + LoadIntToLane(dst, vform, unpack_size, i, addr); + } else { + LoadUintToLane(dst, vform, unpack_size, i, addr); + } + } +} + + +void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { + ld1r(vform, vform, dst, addr); +} + + +void Simulator::ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr1) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr1 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + LoadLane(dst1, vform, i, addr1); + LoadLane(dst2, vform, i, addr2); + addr1 += 2 * esize; + addr2 += 2 * esize; + } +} + + +void Simulator::ld2(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + int index, + uint64_t addr1) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); + LoadLane(dst1, vform, index, addr1); + LoadLane(dst2, vform, index, addr2); +} + + +void Simulator::ld2r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + uint64_t addr) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + LoadLane(dst1, vform, i, addr); + LoadLane(dst2, vform, i, addr2); + } +} + + +void Simulator::ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr1) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr1 + esize; + uint64_t addr3 = addr2 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + LoadLane(dst1, vform, i, addr1); + LoadLane(dst2, vform, i, addr2); + LoadLane(dst3, vform, i, addr3); + addr1 += 3 * esize; + addr2 += 3 * esize; + addr3 += 3 * esize; + } +} + + +void Simulator::ld3(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + int index, + uint64_t addr1) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + LoadLane(dst1, vform, index, addr1); + LoadLane(dst2, vform, index, addr2); + LoadLane(dst3, vform, index, addr3); +} + + +void Simulator::ld3r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + uint64_t addr) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + LoadLane(dst1, vform, i, addr); + LoadLane(dst2, vform, i, addr2); + LoadLane(dst3, vform, i, addr3); + } +} + + +void Simulator::ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr1) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + dst4.ClearForWrite(vform); + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr1 + esize; + uint64_t addr3 = addr2 + esize; + uint64_t addr4 = addr3 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + LoadLane(dst1, vform, i, addr1); + LoadLane(dst2, vform, i, addr2); + LoadLane(dst3, vform, i, addr3); + LoadLane(dst4, vform, i, addr4); + addr1 += 4 * esize; + addr2 += 4 * esize; + addr3 += 4 * esize; + addr4 += 4 * esize; + } +} + + +void Simulator::ld4(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + int index, + uint64_t addr1) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + dst4.ClearForWrite(vform); + uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); + LoadLane(dst1, vform, index, addr1); + LoadLane(dst2, vform, index, addr2); + LoadLane(dst3, vform, index, addr3); + LoadLane(dst4, vform, index, addr4); +} + + +void Simulator::ld4r(VectorFormat vform, + LogicVRegister dst1, + LogicVRegister dst2, + LogicVRegister dst3, + LogicVRegister dst4, + uint64_t addr) { + dst1.ClearForWrite(vform); + dst2.ClearForWrite(vform); + dst3.ClearForWrite(vform); + dst4.ClearForWrite(vform); + uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); + uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); + uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + LoadLane(dst1, vform, i, addr); + LoadLane(dst2, vform, i, addr2); + LoadLane(dst3, vform, i, addr3); + LoadLane(dst4, vform, i, addr4); + } +} + + +void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + StoreLane(src, vform, i, addr); + addr += LaneSizeInBytesFromFormat(vform); + } +} + + +void Simulator::st1(VectorFormat vform, + LogicVRegister src, + int index, + uint64_t addr) { + StoreLane(src, vform, index, addr); +} + + +void Simulator::st2(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + uint64_t addr) { + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + StoreLane(src, vform, i, addr); + StoreLane(src2, vform, i, addr2); + addr += 2 * esize; + addr2 += 2 * esize; + } +} + + +void Simulator::st2(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + int index, + uint64_t addr) { + int esize = LaneSizeInBytesFromFormat(vform); + StoreLane(src, vform, index, addr); + StoreLane(src2, vform, index, addr + 1 * esize); +} + + +void Simulator::st3(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + uint64_t addr) { + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr + esize; + uint64_t addr3 = addr2 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + StoreLane(src, vform, i, addr); + StoreLane(src2, vform, i, addr2); + StoreLane(src3, vform, i, addr3); + addr += 3 * esize; + addr2 += 3 * esize; + addr3 += 3 * esize; + } +} + + +void Simulator::st3(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + int index, + uint64_t addr) { + int esize = LaneSizeInBytesFromFormat(vform); + StoreLane(src, vform, index, addr); + StoreLane(src2, vform, index, addr + 1 * esize); + StoreLane(src3, vform, index, addr + 2 * esize); +} + + +void Simulator::st4(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + LogicVRegister src4, + uint64_t addr) { + int esize = LaneSizeInBytesFromFormat(vform); + uint64_t addr2 = addr + esize; + uint64_t addr3 = addr2 + esize; + uint64_t addr4 = addr3 + esize; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + StoreLane(src, vform, i, addr); + StoreLane(src2, vform, i, addr2); + StoreLane(src3, vform, i, addr3); + StoreLane(src4, vform, i, addr4); + addr += 4 * esize; + addr2 += 4 * esize; + addr3 += 4 * esize; + addr4 += 4 * esize; + } +} + + +void Simulator::st4(VectorFormat vform, + LogicVRegister src, + LogicVRegister src2, + LogicVRegister src3, + LogicVRegister src4, + int index, + uint64_t addr) { + int esize = LaneSizeInBytesFromFormat(vform); + StoreLane(src, vform, index, addr); + StoreLane(src2, vform, index, addr + 1 * esize); + StoreLane(src3, vform, index, addr + 2 * esize); + StoreLane(src4, vform, index, addr + 3 * esize); +} + + +LogicVRegister Simulator::cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t sa = src1.Int(vform, i); + int64_t sb = src2.Int(vform, i); + uint64_t ua = src1.Uint(vform, i); + uint64_t ub = src2.Uint(vform, i); + bool result = false; + switch (cond) { + case eq: + result = (ua == ub); + break; + case ge: + result = (sa >= sb); + break; + case gt: + result = (sa > sb); + break; + case hi: + result = (ua > ub); + break; + case hs: + result = (ua >= ub); + break; + case lt: + result = (sa < sb); + break; + case le: + result = (sa <= sb); + break; + default: + VIXL_UNREACHABLE(); + break; + } + dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); + } + return dst; +} + + +LogicVRegister Simulator::cmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int imm, + Condition cond) { + SimVRegister temp; + LogicVRegister imm_reg = dup_immediate(vform, temp, imm); + return cmp(vform, dst, src1, imm_reg, cond); +} + + +LogicVRegister Simulator::cmptst(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t ua = src1.Uint(vform, i); + uint64_t ub = src2.Uint(vform, i); + dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); + } + return dst; +} + + +LogicVRegister Simulator::add(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + int lane_size = LaneSizeInBitsFromFormat(vform); + dst.ClearForWrite(vform); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + uint64_t ua = src1.UintLeftJustified(vform, i); + uint64_t ub = src2.UintLeftJustified(vform, i); + uint64_t ur = ua + ub; + if (ur < ua) { + dst.SetUnsignedSat(i, true); + } + + // Test for signed saturation. + bool pos_a = (ua >> 63) == 0; + bool pos_b = (ub >> 63) == 0; + bool pos_r = (ur >> 63) == 0; + // If the signs of the operands are the same, but different from the result, + // there was an overflow. + if ((pos_a == pos_b) && (pos_a != pos_r)) { + dst.SetSignedSat(i, pos_a); + } + dst.SetInt(vform, i, ur >> (64 - lane_size)); + } + return dst; +} + +LogicVRegister Simulator::add_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value) { + int lane_size = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT(IsUintN(lane_size, value)); + dst.ClearForWrite(vform); + // Left-justify `value`. + uint64_t ub = value << (64 - lane_size); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + uint64_t ua = src1.UintLeftJustified(vform, i); + uint64_t ur = ua + ub; + if (ur < ua) { + dst.SetUnsignedSat(i, true); + } + + // Test for signed saturation. + // `value` is always positive, so we have an overflow if the (signed) result + // is smaller than the first operand. + if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) { + dst.SetSignedSat(i, true); + } + + dst.SetInt(vform, i, ur >> (64 - lane_size)); + } + return dst; +} + +LogicVRegister Simulator::addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uzp1(vform, temp1, src1, src2); + uzp2(vform, temp2, src1, src2); + add(vform, dst, temp1, temp2); + if (IsSVEFormat(vform)) { + interleave_top_bottom(vform, dst, dst); + } + return dst; +} + +LogicVRegister Simulator::sdiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t val1 = src1.Int(vform, i); + int64_t val2 = src2.Int(vform, i); + int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt; + int64_t quotient = 0; + if ((val1 == min_int) && (val2 == -1)) { + quotient = min_int; + } else if (val2 != 0) { + quotient = val1 / val2; + } + dst.SetInt(vform, i, quotient); + } + + return dst; +} + +LogicVRegister Simulator::udiv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t val1 = src1.Uint(vform, i); + uint64_t val2 = src2.Uint(vform, i); + uint64_t quotient = 0; + if (val2 != 0) { + quotient = val1 / val2; + } + dst.SetUint(vform, i, quotient); + } + + return dst; +} + + +LogicVRegister Simulator::mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + mul(vform, temp, src1, src2); + add(vform, dst, srca, temp); + return dst; +} + + +LogicVRegister Simulator::mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + mul(vform, temp, src1, src2); + sub(vform, dst, srca, temp); + return dst; +} + + +LogicVRegister Simulator::mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::mul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::smulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t dst_val = 0xbadbeef; + int64_t val1 = src1.Int(vform, i); + int64_t val2 = src2.Int(vform, i); + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + dst_val = internal::MultiplyHigh<8>(val1, val2); + break; + case 16: + dst_val = internal::MultiplyHigh<16>(val1, val2); + break; + case 32: + dst_val = internal::MultiplyHigh<32>(val1, val2); + break; + case 64: + dst_val = internal::MultiplyHigh<64>(val1, val2); + break; + default: + VIXL_UNREACHABLE(); + break; + } + dst.SetInt(vform, i, dst_val); + } + return dst; +} + + +LogicVRegister Simulator::umulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t dst_val = 0xbadbeef; + uint64_t val1 = src1.Uint(vform, i); + uint64_t val2 = src2.Uint(vform, i); + switch (LaneSizeInBitsFromFormat(vform)) { + case 8: + dst_val = internal::MultiplyHigh<8>(val1, val2); + break; + case 16: + dst_val = internal::MultiplyHigh<16>(val1, val2); + break; + case 32: + dst_val = internal::MultiplyHigh<32>(val1, val2); + break; + case 64: + dst_val = internal::MultiplyHigh<64>(val1, val2); + break; + default: + VIXL_UNREACHABLE(); + break; + } + dst.SetUint(vform, i, dst_val); + } + return dst; +} + + +LogicVRegister Simulator::mla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::mls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); +} + +LogicVRegister Simulator::sqdmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + +LogicVRegister Simulator::sqdmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + +LogicVRegister Simulator::sqdmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = + VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); + return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + +LogicVRegister Simulator::sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqrdmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + SimVRegister temp; + VectorFormat indexform = VectorFormatFillQ(vform); + return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index)); +} + + +uint64_t Simulator::PolynomialMult(uint64_t op1, + uint64_t op2, + int lane_size_in_bits) const { + VIXL_ASSERT(static_cast(lane_size_in_bits) <= kSRegSize); + VIXL_ASSERT(IsUintN(lane_size_in_bits, op1)); + VIXL_ASSERT(IsUintN(lane_size_in_bits, op2)); + uint64_t result = 0; + for (int i = 0; i < lane_size_in_bits; ++i) { + if ((op1 >> i) & 1) { + result = result ^ (op2 << i); + } + } + return result; +} + + +LogicVRegister Simulator::pmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, + i, + PolynomialMult(src1.Uint(vform, i), + src2.Uint(vform, i), + LaneSizeInBitsFromFormat(vform))); + } + return dst; +} + + +LogicVRegister Simulator::pmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + + VectorFormat vform_src = VectorFormatHalfWidth(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, + i, + PolynomialMult(src1.Uint(vform_src, i), + src2.Uint(vform_src, i), + LaneSizeInBitsFromFormat(vform_src))); + } + + return dst; +} + + +LogicVRegister Simulator::pmull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); + dst.ClearForWrite(vform); + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i++) { + dst.SetUint(vform, + i, + PolynomialMult(src1.Uint(vform_src, lane_count + i), + src2.Uint(vform_src, lane_count + i), + LaneSizeInBitsFromFormat(vform_src))); + } + return dst; +} + + +LogicVRegister Simulator::sub(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + int lane_size = LaneSizeInBitsFromFormat(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + uint64_t ua = src1.UintLeftJustified(vform, i); + uint64_t ub = src2.UintLeftJustified(vform, i); + uint64_t ur = ua - ub; + if (ub > ua) { + dst.SetUnsignedSat(i, false); + } + + // Test for signed saturation. + bool pos_a = (ua >> 63) == 0; + bool pos_b = (ub >> 63) == 0; + bool pos_r = (ur >> 63) == 0; + // If the signs of the operands are different, and the sign of the first + // operand doesn't match the result, there was an overflow. + if ((pos_a != pos_b) && (pos_a != pos_r)) { + dst.SetSignedSat(i, pos_a); + } + + dst.SetInt(vform, i, ur >> (64 - lane_size)); + } + return dst; +} + +LogicVRegister Simulator::sub_uint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + uint64_t value) { + int lane_size = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT(IsUintN(lane_size, value)); + dst.ClearForWrite(vform); + // Left-justify `value`. + uint64_t ub = value << (64 - lane_size); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for unsigned saturation. + uint64_t ua = src1.UintLeftJustified(vform, i); + uint64_t ur = ua - ub; + if (ub > ua) { + dst.SetUnsignedSat(i, false); + } + + // Test for signed saturation. + // `value` is always positive, so we have an overflow if the (signed) result + // is greater than the first operand. + if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) { + dst.SetSignedSat(i, false); + } + + dst.SetInt(vform, i, ur >> (64 - lane_size)); + } + return dst; +} + +LogicVRegister Simulator::and_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::orn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::eor(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::bic(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm) { + uint64_t result[16]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { + result[i] = src.Uint(vform, i) & ~imm; + } + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::bif(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t operand1 = dst.Uint(vform, i); + uint64_t operand2 = ~src2.Uint(vform, i); + uint64_t operand3 = src1.Uint(vform, i); + uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); + dst.SetUint(vform, i, result); + } + return dst; +} + + +LogicVRegister Simulator::bit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t operand1 = dst.Uint(vform, i); + uint64_t operand2 = src2.Uint(vform, i); + uint64_t operand3 = src1.Uint(vform, i); + uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); + dst.SetUint(vform, i, result); + } + return dst; +} + + +LogicVRegister Simulator::bsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src_mask, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t operand1 = src2.Uint(vform, i); + uint64_t operand2 = src_mask.Uint(vform, i); + uint64_t operand3 = src1.Uint(vform, i); + uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); + dst.SetUint(vform, i, result); + } + return dst; +} + + +LogicVRegister Simulator::sminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t src1_val = src1.Int(vform, i); + int64_t src2_val = src2.Int(vform, i); + int64_t dst_val; + if (max) { + dst_val = (src1_val > src2_val) ? src1_val : src2_val; + } else { + dst_val = (src1_val < src2_val) ? src1_val : src2_val; + } + dst.SetInt(vform, i, dst_val); + } + return dst; +} + + +LogicVRegister Simulator::smax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sminmax(vform, dst, src1, src2, true); +} + + +LogicVRegister Simulator::smin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sminmax(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::sminmaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max) { + unsigned lanes = LaneCountFromFormat(vform); + int64_t result[kZRegMaxSizeInBytes]; + const LogicVRegister* src = &src1; + for (unsigned j = 0; j < 2; j++) { + for (unsigned i = 0; i < lanes; i += 2) { + int64_t first_val = src->Int(vform, i); + int64_t second_val = src->Int(vform, i + 1); + int64_t dst_val; + if (max) { + dst_val = (first_val > second_val) ? first_val : second_val; + } else { + dst_val = (first_val < second_val) ? first_val : second_val; + } + VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result)); + result[(i >> 1) + (j * lanes / 2)] = dst_val; + } + src = &src2; + } + dst.SetIntArray(vform, result); + if (IsSVEFormat(vform)) { + interleave_top_bottom(vform, dst, dst); + } + return dst; +} + + +LogicVRegister Simulator::smaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sminmaxp(vform, dst, src1, src2, true); +} + + +LogicVRegister Simulator::sminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sminmaxp(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::addp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VIXL_ASSERT(vform == kFormatD); + + uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); + dst.ClearForWrite(vform); + dst.SetUint(vform, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::addv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + + + int64_t dst_val = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst_val += src.Int(vform, i); + } + + dst.ClearForWrite(vform_dst); + dst.SetInt(vform_dst, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::saddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); + + int64_t dst_val = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst_val += src.Int(vform, i); + } + + dst.ClearForWrite(vform_dst); + dst.SetInt(vform_dst, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::uaddlv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); + + uint64_t dst_val = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst_val += src.Uint(vform, i); + } + + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::sminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + bool max) { + int64_t dst_val = max ? INT64_MIN : INT64_MAX; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + int64_t src_val = src.Int(vform, i); + if (max) { + dst_val = (src_val > dst_val) ? src_val : dst_val; + } else { + dst_val = (src_val < dst_val) ? src_val : dst_val; + } + } + dst.ClearForWrite(ScalarFormatFromFormat(vform)); + dst.SetInt(vform, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + sminmaxv(vform, dst, GetPTrue(), src, true); + return dst; +} + + +LogicVRegister Simulator::sminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + sminmaxv(vform, dst, GetPTrue(), src, false); + return dst; +} + + +LogicVRegister Simulator::smaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + sminmaxv(vform, dst, pg, src, true); + return dst; +} + + +LogicVRegister Simulator::sminv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + sminmaxv(vform, dst, pg, src, false); + return dst; +} + + +LogicVRegister Simulator::uminmax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t src1_val = src1.Uint(vform, i); + uint64_t src2_val = src2.Uint(vform, i); + uint64_t dst_val; + if (max) { + dst_val = (src1_val > src2_val) ? src1_val : src2_val; + } else { + dst_val = (src1_val < src2_val) ? src1_val : src2_val; + } + dst.SetUint(vform, i, dst_val); + } + return dst; +} + + +LogicVRegister Simulator::umax(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return uminmax(vform, dst, src1, src2, true); +} + + +LogicVRegister Simulator::umin(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return uminmax(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::uminmaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool max) { + unsigned lanes = LaneCountFromFormat(vform); + uint64_t result[kZRegMaxSizeInBytes]; + const LogicVRegister* src = &src1; + for (unsigned j = 0; j < 2; j++) { + for (unsigned i = 0; i < lanes; i += 2) { + uint64_t first_val = src->Uint(vform, i); + uint64_t second_val = src->Uint(vform, i + 1); + uint64_t dst_val; + if (max) { + dst_val = (first_val > second_val) ? first_val : second_val; + } else { + dst_val = (first_val < second_val) ? first_val : second_val; + } + VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result)); + result[(i >> 1) + (j * lanes / 2)] = dst_val; + } + src = &src2; + } + dst.SetUintArray(vform, result); + if (IsSVEFormat(vform)) { + interleave_top_bottom(vform, dst, dst); + } + return dst; +} + + +LogicVRegister Simulator::umaxp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return uminmaxp(vform, dst, src1, src2, true); +} + + +LogicVRegister Simulator::uminp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return uminmaxp(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::uminmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + bool max) { + uint64_t dst_val = max ? 0 : UINT64_MAX; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t src_val = src.Uint(vform, i); + if (max) { + dst_val = (src_val > dst_val) ? src_val : dst_val; + } else { + dst_val = (src_val < dst_val) ? src_val : dst_val; + } + } + dst.ClearForWrite(ScalarFormatFromFormat(vform)); + dst.SetUint(vform, 0, dst_val); + return dst; +} + + +LogicVRegister Simulator::umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uminmaxv(vform, dst, GetPTrue(), src, true); + return dst; +} + + +LogicVRegister Simulator::uminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uminmaxv(vform, dst, GetPTrue(), src, false); + return dst; +} + + +LogicVRegister Simulator::umaxv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uminmaxv(vform, dst, pg, src, true); + return dst; +} + + +LogicVRegister Simulator::uminv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uminmaxv(vform, dst, pg, src, false); + return dst; +} + + +LogicVRegister Simulator::shl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return ushl(vform, dst, src, shiftreg); +} + + +LogicVRegister Simulator::sshll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = sxtl(vform, temp2, src); + return sshl(vform, dst, extendedreg, shiftreg); +} + + +LogicVRegister Simulator::sshll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = sxtl2(vform, temp2, src); + return sshl(vform, dst, extendedreg, shiftreg); +} + + +LogicVRegister Simulator::shll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int shift = LaneSizeInBitsFromFormat(vform) / 2; + return sshll(vform, dst, src, shift); +} + + +LogicVRegister Simulator::shll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int shift = LaneSizeInBitsFromFormat(vform) / 2; + return sshll2(vform, dst, src, shift); +} + + +LogicVRegister Simulator::ushll(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = uxtl(vform, temp2, src); + return ushl(vform, dst, extendedreg, shiftreg); +} + + +LogicVRegister Simulator::ushll2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp1, temp2; + LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); + LogicVRegister extendedreg = uxtl2(vform, temp2, src); + return ushl(vform, dst, extendedreg, shiftreg); +} + +std::pair Simulator::clast(VectorFormat vform, + const LogicPRegister& pg, + const LogicVRegister& src, + int offset_from_last_active) { + // Untested for any other values. + VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1)); + + int last_active = GetLastActive(vform, pg); + int lane_count = LaneCountFromFormat(vform); + int index = + ((last_active + offset_from_last_active) + lane_count) % lane_count; + return std::make_pair(last_active >= 0, src.Uint(vform, index)); +} + +LogicVRegister Simulator::compact(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + int j = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (pg.IsActive(vform, i)) { + dst.SetUint(vform, j++, src.Uint(vform, i)); + } + } + for (; j < LaneCountFromFormat(vform); j++) { + dst.SetUint(vform, j, 0); + } + return dst; +} + +LogicVRegister Simulator::splice(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2) { + int lane_count = LaneCountFromFormat(vform); + int first_active = GetFirstActive(vform, pg); + int last_active = GetLastActive(vform, pg); + int dst_idx = 0; + uint64_t result[kZRegMaxSizeInBytes]; + + if (first_active >= 0) { + VIXL_ASSERT(last_active >= first_active); + VIXL_ASSERT(last_active < lane_count); + for (int i = first_active; i <= last_active; i++) { + result[dst_idx++] = src1.Uint(vform, i); + } + } + + VIXL_ASSERT(dst_idx <= lane_count); + for (int i = dst_idx; i < lane_count; i++) { + result[i] = src2.Uint(vform, i - dst_idx); + } + + dst.SetUintArray(vform, result); + + return dst; +} + +LogicVRegister Simulator::sel(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2) { + int p_reg_bits_per_lane = + LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit; + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane) + ? src1.Uint(vform, lane) + : src2.Uint(vform, lane); + dst.SetUint(vform, lane, lane_value); + } + return dst; +} + + +LogicPRegister Simulator::sel(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src1, + const LogicPRegister& src2) { + for (int i = 0; i < dst.GetChunkCount(); i++) { + LogicPRegister::ChunkType mask = pg.GetChunk(i); + LogicPRegister::ChunkType result = + (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i)); + dst.SetChunk(i, result); + } + return dst; +} + + +LogicVRegister Simulator::sli(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + dst.ClearForWrite(vform); + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i++) { + uint64_t src_lane = src.Uint(vform, i); + uint64_t dst_lane = dst.Uint(vform, i); + uint64_t shifted = src_lane << shift; + uint64_t mask = MaxUintFromFormat(vform) << shift; + dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); + } + return dst; +} + + +LogicVRegister Simulator::sqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); +} + + +LogicVRegister Simulator::uqshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::sqshlu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, shift); + return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::sri(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + dst.ClearForWrite(vform); + int lane_count = LaneCountFromFormat(vform); + VIXL_ASSERT((shift > 0) && + (shift <= static_cast(LaneSizeInBitsFromFormat(vform)))); + for (int i = 0; i < lane_count; i++) { + uint64_t src_lane = src.Uint(vform, i); + uint64_t dst_lane = dst.Uint(vform, i); + uint64_t shifted; + uint64_t mask; + if (shift == 64) { + shifted = 0; + mask = 0; + } else { + shifted = src_lane >> shift; + mask = MaxUintFromFormat(vform) >> shift; + } + dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); + } + return dst; +} + + +LogicVRegister Simulator::ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); + return ushl(vform, dst, src, shiftreg); +} + + +LogicVRegister Simulator::sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + VIXL_ASSERT(shift >= 0); + SimVRegister temp; + LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); + return sshl(vform, dst, src, shiftreg); +} + + +LogicVRegister Simulator::ssra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = sshr(vform, temp, src, shift); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::usra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = ushr(vform, temp, src, shift); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::srsra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::ursra(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); + return add(vform, dst, dst, shifted_reg); +} + + +LogicVRegister Simulator::cls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); + + // Ensure that we can store one result per lane. + int result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < lane_count; i++) { + result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::clz(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); + + // Ensure that we can store one result per lane. + int result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < lane_count; i++) { + result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::cnot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0; + dst.SetUint(vform, i, value); + } + return dst; +} + + +LogicVRegister Simulator::cnt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + int lane_count = LaneCountFromFormat(vform); + + // Ensure that we can store one result per lane. + int result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < lane_count; i++) { + result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + +static int64_t CalculateSignedShiftDistance(int64_t shift_val, + int esize, + bool shift_in_ls_byte) { + if (shift_in_ls_byte) { + // Neon uses the least-significant byte of the lane as the shift distance. + shift_val = ExtractSignedBitfield64(7, 0, shift_val); + } else { + // SVE uses a saturated shift distance in the range + // -(esize + 1) ... (esize + 1). + if (shift_val > (esize + 1)) shift_val = esize + 1; + if (shift_val < -(esize + 1)) shift_val = -(esize + 1); + } + return shift_val; +} + +LogicVRegister Simulator::sshl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool shift_in_ls_byte) { + dst.ClearForWrite(vform); + int esize = LaneSizeInBitsFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i), + esize, + shift_in_ls_byte); + + int64_t lj_src_val = src1.IntLeftJustified(vform, i); + + // Set signed saturation state. + if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { + dst.SetSignedSat(i, lj_src_val >= 0); + } + + // Set unsigned saturation state. + if (lj_src_val < 0) { + dst.SetUnsignedSat(i, false); + } else if ((shift_val > CountLeadingZeros(lj_src_val)) && + (lj_src_val != 0)) { + dst.SetUnsignedSat(i, true); + } + + int64_t src_val = src1.Int(vform, i); + bool src_is_negative = src_val < 0; + if (shift_val > 63) { + dst.SetInt(vform, i, 0); + } else if (shift_val < -63) { + dst.SetRounding(i, src_is_negative); + dst.SetInt(vform, i, src_is_negative ? -1 : 0); + } else { + // Use unsigned types for shifts, as behaviour is undefined for signed + // lhs. + uint64_t usrc_val = static_cast(src_val); + + if (shift_val < 0) { + // Convert to right shift. + shift_val = -shift_val; + + // Set rounding state by testing most-significant bit shifted out. + // Rounding only needed on right shifts. + if (((usrc_val >> (shift_val - 1)) & 1) == 1) { + dst.SetRounding(i, true); + } + + usrc_val >>= shift_val; + + if (src_is_negative) { + // Simulate sign-extension. + usrc_val |= (~UINT64_C(0) << (64 - shift_val)); + } + } else { + usrc_val <<= shift_val; + } + dst.SetUint(vform, i, usrc_val); + } + } + return dst; +} + + +LogicVRegister Simulator::ushl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool shift_in_ls_byte) { + dst.ClearForWrite(vform); + int esize = LaneSizeInBitsFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i), + esize, + shift_in_ls_byte); + + uint64_t lj_src_val = src1.UintLeftJustified(vform, i); + + // Set saturation state. + if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { + dst.SetUnsignedSat(i, true); + } + + uint64_t src_val = src1.Uint(vform, i); + if ((shift_val > 63) || (shift_val < -64)) { + dst.SetUint(vform, i, 0); + } else { + if (shift_val < 0) { + // Set rounding state. Rounding only needed on right shifts. + if (((src_val >> (-shift_val - 1)) & 1) == 1) { + dst.SetRounding(i, true); + } + + if (shift_val == -64) { + src_val = 0; + } else { + src_val >>= -shift_val; + } + } else { + src_val <<= shift_val; + } + dst.SetUint(vform, i, src_val); + } + } + return dst; +} + +LogicVRegister Simulator::sshr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + // Saturate to sidestep the min-int problem. + neg(vform, temp, src2).SignedSaturate(vform); + sshl(vform, dst, src1, temp, false); + return dst; +} + +LogicVRegister Simulator::ushr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + // Saturate to sidestep the min-int problem. + neg(vform, temp, src2).SignedSaturate(vform); + ushl(vform, dst, src1, temp, false); + return dst; +} + +LogicVRegister Simulator::neg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for signed saturation. + int64_t sa = src.Int(vform, i); + if (sa == MinIntFromFormat(vform)) { + dst.SetSignedSat(i, true); + } + dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); + } + return dst; +} + + +LogicVRegister Simulator::suqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t sa = src1.IntLeftJustified(vform, i); + uint64_t ub = src2.UintLeftJustified(vform, i); + uint64_t ur = sa + ub; + + int64_t sr; + memcpy(&sr, &ur, sizeof(sr)); + if (sr < sa) { // Test for signed positive saturation. + dst.SetInt(vform, i, MaxIntFromFormat(vform)); + } else { + dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i)); + } + } + return dst; +} + + +LogicVRegister Simulator::usqadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t ua = src1.UintLeftJustified(vform, i); + int64_t sb = src2.IntLeftJustified(vform, i); + uint64_t ur = ua + sb; + + if ((sb > 0) && (ur <= ua)) { + dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. + } else if ((sb < 0) && (ur >= ua)) { + dst.SetUint(vform, i, 0); // Negative saturation. + } else { + dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i)); + } + } + return dst; +} + + +LogicVRegister Simulator::abs(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Test for signed saturation. + int64_t sa = src.Int(vform, i); + if (sa == MinIntFromFormat(vform)) { + dst.SetSignedSat(i, true); + } + if (sa < 0) { + dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); + } else { + dst.SetInt(vform, i, sa); + } + } + return dst; +} + + +LogicVRegister Simulator::andv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform)); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result &= src.Uint(vform, i); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, result); + return dst; +} + + +LogicVRegister Simulator::eorv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result ^= src.Uint(vform, i); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, result); + return dst; +} + + +LogicVRegister Simulator::orv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result |= src.Uint(vform, i); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + dst.ClearForWrite(vform_dst); + dst.SetUint(vform_dst, 0, result); + return dst; +} + + +LogicVRegister Simulator::saddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize); + int64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + // The destination register always has D-lane sizes and the source register + // always has S-lanes or smaller, so signed integer overflow -- undefined + // behaviour -- can't occur. + result += src.Int(vform, i); + } + + dst.ClearForWrite(kFormatD); + dst.SetInt(kFormatD, 0, result); + return dst; +} + + +LogicVRegister Simulator::uaddv(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t result = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result += src.Uint(vform, i); + } + + dst.ClearForWrite(kFormatD); + dst.SetUint(kFormatD, 0, result); + return dst; +} + + +LogicVRegister Simulator::extractnarrow(VectorFormat dstform, + LogicVRegister dst, + bool dst_is_signed, + const LogicVRegister& src, + bool src_is_signed) { + bool upperhalf = false; + VectorFormat srcform = dstform; + if ((dstform == kFormat16B) || (dstform == kFormat8H) || + (dstform == kFormat4S)) { + upperhalf = true; + srcform = VectorFormatHalfLanes(srcform); + } + srcform = VectorFormatDoubleWidth(srcform); + + LogicVRegister src_copy = src; + + int offset; + if (upperhalf) { + offset = LaneCountFromFormat(dstform) / 2; + } else { + offset = 0; + } + + for (int i = 0; i < LaneCountFromFormat(srcform); i++) { + int64_t ssrc = src_copy.Int(srcform, i); + uint64_t usrc = src_copy.Uint(srcform, i); + + // Test for signed saturation + if (ssrc > MaxIntFromFormat(dstform)) { + dst.SetSignedSat(offset + i, true); + } else if (ssrc < MinIntFromFormat(dstform)) { + dst.SetSignedSat(offset + i, false); + } + + // Test for unsigned saturation + if (src_is_signed) { + if (ssrc > static_cast(MaxUintFromFormat(dstform))) { + dst.SetUnsignedSat(offset + i, true); + } else if (ssrc < 0) { + dst.SetUnsignedSat(offset + i, false); + } + } else { + if (usrc > MaxUintFromFormat(dstform)) { + dst.SetUnsignedSat(offset + i, true); + } + } + + int64_t result; + if (src_is_signed) { + result = ssrc & MaxUintFromFormat(dstform); + } else { + result = usrc & MaxUintFromFormat(dstform); + } + + if (dst_is_signed) { + dst.SetInt(dstform, offset + i, result); + } else { + dst.SetUint(dstform, offset + i, result); + } + } + + if (!upperhalf) { + dst.ClearForWrite(dstform); + } + return dst; +} + + +LogicVRegister Simulator::xtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, true, src, true); +} + + +LogicVRegister Simulator::sqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqxtun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqxtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::absdiff(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_signed) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i)) + : (src1.Uint(vform, i) > src2.Uint(vform, i)); + // Always calculate the answer using unsigned arithmetic, to avoid + // implementation-defined signed overflow. + if (src1_gt_src2) { + dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i)); + } else { + dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i)); + } + } + return dst; +} + + +LogicVRegister Simulator::saba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + dst.ClearForWrite(vform); + absdiff(vform, temp, src1, src2, true); + add(vform, dst, dst, temp); + return dst; +} + + +LogicVRegister Simulator::uaba(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + dst.ClearForWrite(vform); + absdiff(vform, temp, src1, src2, false); + add(vform, dst, dst, temp); + return dst; +} + + +LogicVRegister Simulator::not_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, ~src.Uint(vform, i)); + } + return dst; +} + + +LogicVRegister Simulator::rbit(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); + uint64_t reversed_value; + uint64_t value; + for (int i = 0; i < lane_count; i++) { + value = src.Uint(vform, i); + reversed_value = 0; + for (int j = 0; j < lane_size_in_bits; j++) { + reversed_value = (reversed_value << 1) | (value & 1); + value >>= 1; + } + result[i] = reversed_value; + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::rev(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VIXL_ASSERT(IsSVEFormat(vform)); + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count / 2; i++) { + uint64_t t = src.Uint(vform, i); + dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1)); + dst.SetUint(vform, lane_count - i - 1, t); + } + return dst; +} + + +LogicVRegister Simulator::rev_byte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rev_size) { + uint64_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + int lane_size = LaneSizeInBytesFromFormat(vform); + int lanes_per_loop = rev_size / lane_size; + for (int i = 0; i < lane_count; i += lanes_per_loop) { + for (int j = 0; j < lanes_per_loop; j++) { + result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j); + } + } + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::rev16(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return rev_byte(vform, dst, src, 2); +} + + +LogicVRegister Simulator::rev32(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return rev_byte(vform, dst, src, 4); +} + + +LogicVRegister Simulator::rev64(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return rev_byte(vform, dst, src, 8); +} + +LogicVRegister Simulator::addlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_signed, + bool do_accumulate) { + VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize); + + uint64_t result[kZRegMaxSizeInBytes]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i++) { + if (is_signed) { + result[i] = static_cast(src.Int(vformsrc, 2 * i) + + src.Int(vformsrc, 2 * i + 1)); + } else { + result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); + } + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + if (do_accumulate) { + result[i] += dst.Uint(vform, i); + } + dst.SetUint(vform, i, result[i]); + } + + return dst; +} + + +LogicVRegister Simulator::saddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, true, false); +} + + +LogicVRegister Simulator::uaddlp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, false, false); +} + + +LogicVRegister Simulator::sadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, true, true); +} + + +LogicVRegister Simulator::uadalp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return addlp(vform, dst, src, false, true); +} + +LogicVRegister Simulator::ror(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int rotation) { + int width = LaneSizeInBitsFromFormat(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = src.Uint(vform, i); + dst.SetUint(vform, i, RotateRight(value, rotation, width)); + } + return dst; +} + +LogicVRegister Simulator::ext(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + uint8_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count - index; ++i) { + result[i] = src1.Uint(vform, i + index); + } + for (int i = 0; i < index; ++i) { + result[lane_count - index + i] = src2.Uint(vform, i); + } + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + +LogicVRegister Simulator::rotate_elements_right(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int index) { + if (index < 0) index += LaneCountFromFormat(vform); + VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform))); + index *= LaneSizeInBytesFromFormat(vform); + return ext(kFormatVnB, dst, src, src, index); +} + + +template +LogicVRegister Simulator::fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src) { + T result = acc.Float(0); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + result = FPAdd(result, src.Float(i)); + } + VectorFormat vform_dst = + ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); + acc.ClearForWrite(vform_dst); + acc.SetFloat(0, result); + return acc; +} + +LogicVRegister Simulator::fadda(VectorFormat vform, + LogicVRegister acc, + const LogicPRegister& pg, + const LogicVRegister& src) { + switch (LaneSizeInBitsFromFormat(vform)) { + case kHRegSize: + fadda(vform, acc, pg, src); + break; + case kSRegSize: + fadda(vform, acc, pg, src); + break; + case kDRegSize: + fadda(vform, acc, pg, src); + break; + default: + VIXL_UNREACHABLE(); + } + return acc; +} + +template +LogicVRegister Simulator::fcadd(VectorFormat vform, + LogicVRegister dst, // d + const LogicVRegister& src1, // n + const LogicVRegister& src2, // m + int rot) { + int elements = LaneCountFromFormat(vform); + + T element1, element3; + rot = (rot == 1) ? 270 : 90; + + // Loop example: + // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) + // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) + + for (int e = 0; e <= (elements / 2) - 1; e++) { + switch (rot) { + case 90: + element1 = FPNeg(src2.Float(e * 2 + 1)); + element3 = src2.Float(e * 2); + break; + case 270: + element1 = src2.Float(e * 2 + 1); + element3 = FPNeg(src2.Float(e * 2)); + break; + default: + VIXL_UNREACHABLE(); + return dst; // prevents "element(n) may be unintialized" errors + } + dst.ClearForWrite(vform); + dst.SetFloat(e * 2, FPAdd(src1.Float(e * 2), element1)); + dst.SetFloat(e * 2 + 1, FPAdd(src1.Float(e * 2 + 1), element3)); + } + return dst; +} + + +LogicVRegister Simulator::fcadd(VectorFormat vform, + LogicVRegister dst, // d + const LogicVRegister& src1, // n + const LogicVRegister& src2, // m + int rot) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fcadd(vform, dst, src1, src2, rot); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fcadd(vform, dst, src1, src2, rot); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fcadd(vform, dst, src1, src2, rot); + } + return dst; +} + +template +LogicVRegister Simulator::fcmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + const LogicVRegister& acc, + int index, + int rot) { + int elements = LaneCountFromFormat(vform); + + T element1, element2, element3, element4; + rot *= 90; + + // Loop example: + // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) + // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) + + for (int e = 0; e <= (elements / 2) - 1; e++) { + // Index == -1 indicates a vector/vector rather than vector/indexed-element + // operation. + int f = (index < 0) ? e : index; + + switch (rot) { + case 0: + element1 = src2.Float(f * 2); + element2 = src1.Float(e * 2); + element3 = src2.Float(f * 2 + 1); + element4 = src1.Float(e * 2); + break; + case 90: + element1 = FPNeg(src2.Float(f * 2 + 1)); + element2 = src1.Float(e * 2 + 1); + element3 = src2.Float(f * 2); + element4 = src1.Float(e * 2 + 1); + break; + case 180: + element1 = FPNeg(src2.Float(f * 2)); + element2 = src1.Float(e * 2); + element3 = FPNeg(src2.Float(f * 2 + 1)); + element4 = src1.Float(e * 2); + break; + case 270: + element1 = src2.Float(f * 2 + 1); + element2 = src1.Float(e * 2 + 1); + element3 = FPNeg(src2.Float(f * 2)); + element4 = src1.Float(e * 2 + 1); + break; + default: + VIXL_UNREACHABLE(); + return dst; // prevents "element(n) may be unintialized" errors + } + dst.ClearForWrite(vform); + dst.SetFloat(vform, + e * 2, + FPMulAdd(acc.Float(e * 2), element2, element1)); + dst.SetFloat(vform, + e * 2 + 1, + FPMulAdd(acc.Float(e * 2 + 1), element4, element3)); + } + return dst; +} + +LogicVRegister Simulator::fcmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + const LogicVRegister& acc, + int rot) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fcmla(vform, dst, src1, src2, acc, -1, rot); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fcmla(vform, dst, src1, src2, acc, -1, rot); + } else { + fcmla(vform, dst, src1, src2, acc, -1, rot); + } + return dst; +} + + +LogicVRegister Simulator::fcmla(VectorFormat vform, + LogicVRegister dst, // d + const LogicVRegister& src1, // n + const LogicVRegister& src2, // m + int index, + int rot) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + VIXL_UNIMPLEMENTED(); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fcmla(vform, dst, src1, src2, dst, index, rot); + } else { + fcmla(vform, dst, src1, src2, dst, index, rot); + } + return dst; +} + +LogicVRegister Simulator::cadd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot, + bool saturate) { + SimVRegister src1_r, src1_i; + SimVRegister src2_r, src2_i; + SimVRegister zero; + zero.Clear(); + uzp1(vform, src1_r, src1, zero); + uzp2(vform, src1_i, src1, zero); + uzp1(vform, src2_r, src2, zero); + uzp2(vform, src2_i, src2, zero); + + if (rot == 90) { + if (saturate) { + sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform); + add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform); + } else { + sub(vform, src1_r, src1_r, src2_i); + add(vform, src1_i, src1_i, src2_r); + } + } else { + VIXL_ASSERT(rot == 270); + if (saturate) { + add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform); + sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform); + } else { + add(vform, src1_r, src1_r, src2_i); + sub(vform, src1_i, src1_i, src2_r); + } + } + + zip1(vform, dst, src1_r, src1_i); + return dst; +} + +LogicVRegister Simulator::cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot) { + SimVRegister src1_a; + SimVRegister src2_a, src2_b; + SimVRegister srca_i, srca_r; + SimVRegister zero, temp; + zero.Clear(); + + if ((rot == 0) || (rot == 180)) { + uzp1(vform, src1_a, src1, zero); + uzp1(vform, src2_a, src2, zero); + uzp2(vform, src2_b, src2, zero); + } else { + uzp2(vform, src1_a, src1, zero); + uzp2(vform, src2_a, src2, zero); + uzp1(vform, src2_b, src2, zero); + } + + uzp1(vform, srca_r, srca, zero); + uzp2(vform, srca_i, srca, zero); + + bool sub_r = (rot == 90) || (rot == 180); + bool sub_i = (rot == 180) || (rot == 270); + + mul(vform, temp, src1_a, src2_a); + if (sub_r) { + sub(vform, srca_r, srca_r, temp); + } else { + add(vform, srca_r, srca_r, temp); + } + + mul(vform, temp, src1_a, src2_b); + if (sub_i) { + sub(vform, srca_i, srca_i, temp); + } else { + add(vform, srca_i, srca_i, temp); + } + + zip1(vform, dst, srca_r, srca_i); + return dst; +} + +LogicVRegister Simulator::cmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot) { + SimVRegister temp; + dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index); + return cmla(vform, dst, srca, src1, temp, rot); +} + +LogicVRegister Simulator::bgrp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_bext) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = src1.Uint(vform, i); + uint64_t mask = src2.Uint(vform, i); + int high_pos = 0; + int low_pos = 0; + uint64_t result_high = 0; + uint64_t result_low = 0; + for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) { + if ((mask & 1) == 0) { + result_high |= (value & 1) << high_pos; + high_pos++; + } else { + result_low |= (value & 1) << low_pos; + low_pos++; + } + mask >>= 1; + value >>= 1; + } + + if (!do_bext) { + result_low |= result_high << low_pos; + } + + dst.SetUint(vform, i, result_low); + } + return dst; +} + +LogicVRegister Simulator::bdep(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = src1.Uint(vform, i); + uint64_t mask = src2.Uint(vform, i); + uint64_t result = 0; + for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) { + if ((mask & 1) == 1) { + result |= (value & 1) << j; + value >>= 1; + } + mask >>= 1; + } + dst.SetUint(vform, i, result); + } + return dst; +} + +LogicVRegister Simulator::histogram(VectorFormat vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool do_segmented) { + int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform); + uint64_t result[kZRegMaxSizeInBytes]; + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t count = 0; + uint64_t value = src1.Uint(vform, i); + + int segment = do_segmented ? (i / elements_per_segment) : 0; + int segment_offset = segment * elements_per_segment; + int hist_limit = do_segmented ? elements_per_segment : (i + 1); + for (int j = 0; j < hist_limit; j++) { + if (pg.IsActive(vform, j) && + (value == src2.Uint(vform, j + segment_offset))) { + count++; + } + } + result[i] = count; + } + dst.SetUintArray(vform, result); + return dst; +} + +LogicVRegister Simulator::dup_element(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index) { + if ((vform == kFormatVnQ) || (vform == kFormatVnO)) { + // When duplicating an element larger than 64 bits, split the element into + // 64-bit parts, and duplicate the parts across the destination. + uint64_t d[4]; + int count = (vform == kFormatVnQ) ? 2 : 4; + for (int i = 0; i < count; i++) { + d[i] = src.Uint(kFormatVnD, (src_index * count) + i); + } + dst.Clear(); + for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) { + dst.SetUint(kFormatVnD, i, d[i % count]); + } + } else { + int lane_count = LaneCountFromFormat(vform); + uint64_t value = src.Uint(vform, src_index); + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, value); + } + } + return dst; +} + +LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int src_index) { + // In SVE, a segment is a 128-bit portion of a vector, like a Q register, + // whereas in NEON, the size of segment is equal to the size of register + // itself. + int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform)); + VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform))); + int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform); + + VIXL_ASSERT(src_index >= 0); + VIXL_ASSERT(src_index < lanes_per_segment); + + dst.ClearForWrite(vform); + for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) { + uint64_t value = src.Uint(vform, j + src_index); + for (int i = 0; i < lanes_per_segment; i++) { + dst.SetUint(vform, j + i, value); + } + } + return dst; +} + +LogicVRegister Simulator::dup_elements_to_segments( + VectorFormat vform, + LogicVRegister dst, + const std::pair& src_and_index) { + return dup_elements_to_segments(vform, + dst, + ReadVRegister(src_and_index.first), + src_and_index.second); +} + +LogicVRegister Simulator::dup_immediate(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + int lane_count = LaneCountFromFormat(vform); + uint64_t value = imm & MaxUintFromFormat(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, value); + } + return dst; +} + + +LogicVRegister Simulator::ins_element(VectorFormat vform, + LogicVRegister dst, + int dst_index, + const LogicVRegister& src, + int src_index) { + dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); + return dst; +} + + +LogicVRegister Simulator::ins_immediate(VectorFormat vform, + LogicVRegister dst, + int dst_index, + uint64_t imm) { + uint64_t value = imm & MaxUintFromFormat(vform); + dst.SetUint(vform, dst_index, value); + return dst; +} + + +LogicVRegister Simulator::index(VectorFormat vform, + LogicVRegister dst, + uint64_t start, + uint64_t step) { + VIXL_ASSERT(IsSVEFormat(vform)); + uint64_t value = start; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetUint(vform, i, value); + value += step; + } + return dst; +} + + +LogicVRegister Simulator::insr(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + VIXL_ASSERT(IsSVEFormat(vform)); + for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) { + dst.SetUint(vform, i, dst.Uint(vform, i - 1)); + } + dst.SetUint(vform, 0, imm); + return dst; +} + + +LogicVRegister Simulator::mov(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + dst.SetUint(vform, lane, src.Uint(vform, lane)); + } + return dst; +} + + +LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) { + // Avoid a copy if the registers already alias. + if (dst.Aliases(src)) return dst; + + for (int i = 0; i < dst.GetChunkCount(); i++) { + dst.SetChunk(i, src.GetChunk(i)); + } + return dst; +} + + +LogicVRegister Simulator::mov_merging(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src) { + return sel(vform, dst, pg, src, dst); +} + +LogicVRegister Simulator::mov_zeroing(VectorFormat vform, + LogicVRegister dst, + const SimPRegister& pg, + const LogicVRegister& src) { + SimVRegister zero; + dup_immediate(vform, zero, 0); + return sel(vform, dst, pg, src, zero); +} + +LogicVRegister Simulator::mov_alternating(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int start_at) { + VIXL_ASSERT((start_at == 0) || (start_at == 1)); + for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) { + dst.SetUint(vform, i, src.Uint(vform, i)); + } + return dst; +} + +LogicPRegister Simulator::mov_merging(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + return sel(dst, pg, src, dst); +} + +LogicPRegister Simulator::mov_zeroing(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + SimPRegister all_false; + return sel(dst, pg, src, pfalse(all_false)); +} + +LogicVRegister Simulator::movi(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + int lane_count = LaneCountFromFormat(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, imm); + } + return dst; +} + + +LogicVRegister Simulator::mvni(VectorFormat vform, + LogicVRegister dst, + uint64_t imm) { + int lane_count = LaneCountFromFormat(vform); + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, ~imm); + } + return dst; +} + + +LogicVRegister Simulator::orr(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + uint64_t imm) { + uint64_t result[16]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { + result[i] = src.Uint(vform, i) | imm; + } + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::uxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_2) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + int lane_count = LaneCountFromFormat(vform); + int src_offset = is_2 ? lane_count : 0; + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; i++) { + dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i)); + } + return dst; +} + + +LogicVRegister Simulator::sxtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + bool is_2) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + int lane_count = LaneCountFromFormat(vform); + int src_offset = is_2 ? lane_count : 0; + + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetInt(vform, i, src.Int(vform_half, src_offset + i)); + } + return dst; +} + + +LogicVRegister Simulator::uxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return uxtl(vform, dst, src, /* is_2 = */ true); +} + + +LogicVRegister Simulator::sxtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return sxtl(vform, dst, src, /* is_2 = */ true); +} + + +LogicVRegister Simulator::uxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits) { + int lane_count = LaneCountFromFormat(vform); + uint64_t mask = GetUintMask(from_size_in_bits); + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; i++) { + dst.SetInt(vform, i, src.Uint(vform, i) & mask); + } + return dst; +} + + +LogicVRegister Simulator::sxt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + unsigned from_size_in_bits) { + int lane_count = LaneCountFromFormat(vform); + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; i++) { + uint64_t value = + ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i)); + dst.SetInt(vform, i, value); + } + return dst; +} + + +LogicVRegister Simulator::shrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vform_src = VectorFormatDoubleWidth(vform); + VectorFormat vform_dst = vform; + LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); + return extractnarrow(vform_dst, dst, false, shifted_src, false); +} + + +LogicVRegister Simulator::shrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); + return extractnarrow(vformdst, dst, false, shifted_src, false); +} + + +LogicVRegister Simulator::rshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); + return extractnarrow(vformdst, dst, false, shifted_src, false); +} + + +LogicVRegister Simulator::rshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); + return extractnarrow(vformdst, dst, false, shifted_src, false); +} + +LogicVRegister Simulator::Table(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& ind, + bool zero_out_of_bounds, + const LogicVRegister* tab1, + const LogicVRegister* tab2, + const LogicVRegister* tab3, + const LogicVRegister* tab4) { + VIXL_ASSERT(tab1 != NULL); + int lane_count = LaneCountFromFormat(vform); + VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16)); + uint64_t table[kZRegMaxSizeInBytes * 2]; + uint64_t result[kZRegMaxSizeInBytes]; + + // For Neon, the table source registers are always 16B, and Neon allows only + // 8B or 16B vform for the destination, so infer the table format from the + // destination. + VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform; + + uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]); + if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]); + if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]); + if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]); + + for (int i = 0; i < lane_count; i++) { + uint64_t index = ind.Uint(vform, i); + result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i); + if (index < tab_size) result[i] = table[index]; + } + dst.SetUintArray(vform, result); + return dst; +} + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind) { + return Table(vform, dst, ind, true, &tab); +} + + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind) { + return Table(vform, dst, ind, true, &tab, &tab2); +} + + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind) { + return Table(vform, dst, ind, true, &tab, &tab2, &tab3); +} + + +LogicVRegister Simulator::tbl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind) { + return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& ind) { + return Table(vform, dst, ind, false, &tab); +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& ind) { + return Table(vform, dst, ind, false, &tab, &tab2); +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& ind) { + return Table(vform, dst, ind, false, &tab, &tab2, &tab3); +} + + +LogicVRegister Simulator::tbx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& tab, + const LogicVRegister& tab2, + const LogicVRegister& tab3, + const LogicVRegister& tab4, + const LogicVRegister& ind) { + return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); +} + + +LogicVRegister Simulator::uqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return shrn(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::uqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); +} + + +LogicVRegister Simulator::sqshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtn(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrun(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(vform); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::sqrshrun2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int shift) { + SimVRegister temp; + VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); + VectorFormat vformdst = vform; + LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); + return sqxtun(vformdst, dst, shifted_src); +} + + +LogicVRegister Simulator::uaddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uaddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uaddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::uaddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl2(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::saddl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::saddl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + add(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::saddw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::saddw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl2(vform, temp, src2); + add(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::usubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::usubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::usubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::usubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + uxtl2(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::ssubl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::ssubl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + sub(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::ssubw(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::ssubw2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sxtl2(vform, temp, src2); + sub(vform, dst, src1, temp); + return dst; +} + + +LogicVRegister Simulator::uabal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + uaba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uabal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + uaba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::sabal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + saba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::sabal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + saba(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::uabdl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1); + uxtl(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, false); + return dst; +} + + +LogicVRegister Simulator::uabdl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + uxtl2(vform, temp1, src1); + uxtl2(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, false); + return dst; +} + + +LogicVRegister Simulator::sabdl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1); + sxtl(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, true); + return dst; +} + + +LogicVRegister Simulator::sabdl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp1, temp2; + sxtl2(vform, temp1, src1); + sxtl2(vform, temp2, src2); + absdiff(vform, dst, temp1, temp2, true); + return dst; +} + + +LogicVRegister Simulator::umull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1, is_2); + uxtl(vform, temp2, src2, is_2); + mul(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return umull(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::smull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1, is_2); + sxtl(vform, temp2, src2, is_2); + mul(vform, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return smull(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::umlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1, is_2); + uxtl(vform, temp2, src2, is_2); + mls(vform, dst, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return umlsl(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::smlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1, is_2); + sxtl(vform, temp2, src2, is_2); + mls(vform, dst, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return smlsl(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::umlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp1, temp2; + uxtl(vform, temp1, src1, is_2); + uxtl(vform, temp2, src2, is_2); + mla(vform, dst, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::umlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return umlal(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::smlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp1, temp2; + sxtl(vform, temp1, src1, is_2); + sxtl(vform, temp2, src2, is_2); + mla(vform, dst, dst, temp1, temp2); + return dst; +} + + +LogicVRegister Simulator::smlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return smlal(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::sqdmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp; + LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2); + return add(vform, dst, dst, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::sqdmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp; + LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2); + return sub(vform, dst, dst, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true); +} + + +LogicVRegister Simulator::sqdmull(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_2) { + SimVRegister temp; + LogicVRegister product = smull(vform, temp, src1, src2, is_2); + return add(vform, dst, product, product).SignedSaturate(vform); +} + + +LogicVRegister Simulator::sqdmull2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sqdmull(vform, dst, src1, src2, /* is_2 = */ true); +} + +LogicVRegister Simulator::sqrdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round) { + int esize = LaneSizeInBitsFromFormat(vform); + + SimVRegister temp_lo, temp_hi; + + // Compute low and high multiplication results. + mul(vform, temp_lo, src1, src2); + smulh(vform, temp_hi, src1, src2); + + // Double by shifting high half, and adding in most-significant bit of low + // half. + shl(vform, temp_hi, temp_hi, 1); + usra(vform, temp_hi, temp_lo, esize - 1); + + if (round) { + // Add the second (due to doubling) most-significant bit of the low half + // into the result. + shl(vform, temp_lo, temp_lo, 1); + usra(vform, temp_hi, temp_lo, esize - 1); + } + + SimPRegister not_sat; + LogicPRegister ptemp(not_sat); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Saturation only occurs when src1 = src2 = minimum representable value. + // Check this as a special case. + ptemp.SetActive(vform, i, true); + if ((src1.Int(vform, i) == MinIntFromFormat(vform)) && + (src2.Int(vform, i) == MinIntFromFormat(vform))) { + ptemp.SetActive(vform, i, false); + } + dst.SetInt(vform, i, MaxIntFromFormat(vform)); + } + + mov_merging(vform, dst, not_sat, temp_hi); + return dst; +} + + +LogicVRegister Simulator::dot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_src1_signed, + bool is_src2_signed) { + VectorFormat quarter_vform = + VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); + + dst.ClearForWrite(vform); + for (int e = 0; e < LaneCountFromFormat(vform); e++) { + uint64_t result = 0; + int64_t element1, element2; + for (int i = 0; i < 4; i++) { + int index = 4 * e + i; + if (is_src1_signed) { + element1 = src1.Int(quarter_vform, index); + } else { + element1 = src1.Uint(quarter_vform, index); + } + if (is_src2_signed) { + element2 = src2.Int(quarter_vform, index); + } else { + element2 = src2.Uint(quarter_vform, index); + } + result += element1 * element2; + } + dst.SetUint(vform, e, result + dst.Uint(vform, e)); + } + return dst; +} + + +LogicVRegister Simulator::sdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return dot(vform, dst, src1, src2, true, true); +} + + +LogicVRegister Simulator::udot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return dot(vform, dst, src1, src2, false, false); +} + +LogicVRegister Simulator::usdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return dot(vform, dst, src1, src2, false, true); +} + +LogicVRegister Simulator::cdot(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& acc, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot) { + VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); + VectorFormat quarter_vform = + VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); + + int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1; + int sel_b = 1 - sel_a; + int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1; + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t result = acc.Int(vform, i); + for (int j = 0; j < 2; j++) { + int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0); + int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1); + int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a); + int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b); + result += (r1 * r2) + (sub_i * i1 * i2); + } + dst.SetInt(vform, i, result); + } + return dst; +} + +LogicVRegister Simulator::sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int rot) { + SimVRegister src1_a, src1_b; + SimVRegister src2_a, src2_b; + SimVRegister srca_i, srca_r; + SimVRegister zero, temp; + zero.Clear(); + + if ((rot == 0) || (rot == 180)) { + uzp1(vform, src1_a, src1, zero); + uzp1(vform, src2_a, src2, zero); + uzp2(vform, src2_b, src2, zero); + } else { + uzp2(vform, src1_a, src1, zero); + uzp2(vform, src2_a, src2, zero); + uzp1(vform, src2_b, src2, zero); + } + + uzp1(vform, srca_r, srca, zero); + uzp2(vform, srca_i, srca, zero); + + bool sub_r = (rot == 90) || (rot == 180); + bool sub_i = (rot == 180) || (rot == 270); + + const bool round = true; + sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r); + sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i); + zip1(vform, dst, srca_r, srca_i); + return dst; +} + +LogicVRegister Simulator::sqrdcmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index, + int rot) { + SimVRegister temp; + dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index); + return sqrdcmlah(vform, dst, srca, src1, temp, rot); +} + +LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round, + bool sub_op) { + // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow. + // To avoid this, we use: + // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1) + // which is same as: + // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize. + + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + int esize = kDRegSize; + vixl_uint128_t round_const, accum; + round_const.first = 0; + if (round) { + round_const.second = UINT64_C(1) << (esize - 2); + } else { + round_const.second = 0; + } + + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Shift the whole value left by `esize - 1` bits. + accum.first = dst.Int(vform, i) >> 1; + accum.second = dst.Int(vform, i) << (esize - 1); + + vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i)); + + if (sub_op) { + product = Neg128(product); + } + accum = Add128(accum, product); + + // Perform rounding. + accum = Add128(accum, round_const); + + // Arithmetic shift the whole value right by `esize - 1` bits. + accum.second = (accum.first << 1) | (accum.second >> (esize - 1)); + accum.first = -(accum.first >> (esize - 1)); + + // Perform saturation. + bool is_pos = (accum.first == 0) ? true : false; + if (is_pos && + (accum.second > static_cast(MaxIntFromFormat(vform)))) { + accum.second = MaxIntFromFormat(vform); + } else if (!is_pos && (accum.second < + static_cast(MinIntFromFormat(vform)))) { + accum.second = MinIntFromFormat(vform); + } + + dst.SetInt(vform, i, accum.second); + } + + return dst; +} + +LogicVRegister Simulator::sqrdmlash(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round, + bool sub_op) { + // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. + // To avoid this, we use: + // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1) + // which is same as: + // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize. + + if (vform == kFormatVnD) { + return sqrdmlash_d(vform, dst, src1, src2, round, sub_op); + } + + int esize = LaneSizeInBitsFromFormat(vform); + int round_const = round ? (1 << (esize - 2)) : 0; + int64_t accum; + + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + accum = dst.Int(vform, i) << (esize - 1); + if (sub_op) { + accum -= src1.Int(vform, i) * src2.Int(vform, i); + } else { + accum += src1.Int(vform, i) * src2.Int(vform, i); + } + accum += round_const; + accum = accum >> (esize - 1); + + if (accum > MaxIntFromFormat(vform)) { + accum = MaxIntFromFormat(vform); + } else if (accum < MinIntFromFormat(vform)) { + accum = MinIntFromFormat(vform); + } + dst.SetInt(vform, i, accum); + } + return dst; +} + + +LogicVRegister Simulator::sqrdmlah(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round) { + return sqrdmlash(vform, dst, src1, src2, round, false); +} + + +LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool round) { + return sqrdmlash(vform, dst, src1, src2, round, true); +} + + +LogicVRegister Simulator::sqdmulh(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + return sqrdmulh(vform, dst, src1, src2, false); +} + + +LogicVRegister Simulator::addhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(vform), temp, src1, src2); + shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::addhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::raddhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(vform), temp, src1, src2); + rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::raddhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::subhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(vform), temp, src1, src2); + shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::subhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::rsubhn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(vform), temp, src1, src2); + rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::rsubhn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); + rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); + return dst; +} + + +LogicVRegister Simulator::trn1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, 2 * i); + result[(2 * i) + 1] = src2.Uint(vform, 2 * i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::trn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, (2 * i) + 1); + result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::zip1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, i); + result[(2 * i) + 1] = src2.Uint(vform, i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::zip2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + int pairs = lane_count / 2; + for (int i = 0; i < pairs; ++i) { + result[2 * i] = src1.Uint(vform, pairs + i); + result[(2 * i) + 1] = src2.Uint(vform, pairs + i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + return dst; +} + + +LogicVRegister Simulator::uzp1(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[kZRegMaxSizeInBytes * 2]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { + result[i] = src1.Uint(vform, i); + result[lane_count + i] = src2.Uint(vform, i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[2 * i]); + } + return dst; +} + + +LogicVRegister Simulator::uzp2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + uint64_t result[kZRegMaxSizeInBytes * 2]; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; ++i) { + result[i] = src1.Uint(vform, i); + result[lane_count + i] = src2.Uint(vform, i); + } + + dst.ClearForWrite(vform); + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[(2 * i) + 1]); + } + return dst; +} + +LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + // Interleave the top and bottom half of a vector, ie. for a vector: + // + // [ ... | F | D | B | ... | E | C | A ] + // + // where B is the first element in the top half of the vector, produce a + // result vector: + // + // [ ... | ... | F | E | D | C | B | A ] + + uint64_t result[kZRegMaxSizeInBytes] = {}; + int lane_count = LaneCountFromFormat(vform); + for (int i = 0; i < lane_count; i += 2) { + result[i] = src.Uint(vform, i / 2); + result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2)); + } + dst.SetUintArray(vform, result); + return dst; +} + +template +T Simulator::FPNeg(T op) { + return -op; +} + +template +T Simulator::FPAdd(T op1, T op2) { + T result = FPProcessNaNs(op1, op2); + if (IsNaN(result)) { + return result; + } + + if (IsInf(op1) && IsInf(op2) && (op1 != op2)) { + // inf + -inf returns the default NaN. + FPProcessException(); + return FPDefaultNaN(); + } else { + // Other cases should be handled by standard arithmetic. + return op1 + op2; + } +} + + +template +T Simulator::FPSub(T op1, T op2) { + // NaNs should be handled elsewhere. + VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); + + if (IsInf(op1) && IsInf(op2) && (op1 == op2)) { + // inf - inf returns the default NaN. + FPProcessException(); + return FPDefaultNaN(); + } else { + // Other cases should be handled by standard arithmetic. + return op1 - op2; + } +} + +template +T Simulator::FPMulNaNs(T op1, T op2) { + T result = FPProcessNaNs(op1, op2); + return IsNaN(result) ? result : FPMul(op1, op2); +} + +template +T Simulator::FPMul(T op1, T op2) { + // NaNs should be handled elsewhere. + VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); + + if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) { + // inf * 0.0 returns the default NaN. + FPProcessException(); + return FPDefaultNaN(); + } else { + // Other cases should be handled by standard arithmetic. + return op1 * op2; + } +} + + +template +T Simulator::FPMulx(T op1, T op2) { + if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) { + // inf * 0.0 returns +/-2.0. + T two = 2.0; + return copysign(1.0, op1) * copysign(1.0, op2) * two; + } + return FPMul(op1, op2); +} + + +template +T Simulator::FPMulAdd(T a, T op1, T op2) { + T result = FPProcessNaNs3(a, op1, op2); + + T sign_a = copysign(1.0, a); + T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); + bool isinf_prod = IsInf(op1) || IsInf(op2); + bool operation_generates_nan = + (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0 + (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf + (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf + + if (IsNaN(result)) { + // Generated NaNs override quiet NaNs propagated from a. + if (operation_generates_nan && IsQuietNaN(a)) { + FPProcessException(); + return FPDefaultNaN(); + } else { + return result; + } + } + + // If the operation would produce a NaN, return the default NaN. + if (operation_generates_nan) { + FPProcessException(); + return FPDefaultNaN(); + } + + // Work around broken fma implementations for exact zero results: The sign of + // exact 0.0 results is positive unless both a and op1 * op2 are negative. + if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { + return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0; + } + + result = FusedMultiplyAdd(op1, op2, a); + VIXL_ASSERT(!IsNaN(result)); + + // Work around broken fma implementations for rounded zero results: If a is + // 0.0, the sign of the result is the sign of op1 * op2 before rounding. + if ((a == 0.0) && (result == 0.0)) { + return copysign(0.0, sign_prod); + } + + return result; +} + + +template +T Simulator::FPDiv(T op1, T op2) { + // NaNs should be handled elsewhere. + VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); + + if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { + // inf / inf and 0.0 / 0.0 return the default NaN. + FPProcessException(); + return FPDefaultNaN(); + } else { + if (op2 == 0.0) { + FPProcessException(); + if (!IsNaN(op1)) { + double op1_sign = copysign(1.0, op1); + double op2_sign = copysign(1.0, op2); + return static_cast(op1_sign * op2_sign * kFP64PositiveInfinity); + } + } + + // Other cases should be handled by standard arithmetic. + return op1 / op2; + } +} + + +template +T Simulator::FPSqrt(T op) { + if (IsNaN(op)) { + return FPProcessNaN(op); + } else if (op < T(0.0)) { + FPProcessException(); + return FPDefaultNaN(); + } else { + return sqrt(op); + } +} + + +template +T Simulator::FPMax(T a, T b) { + T result = FPProcessNaNs(a, b); + if (IsNaN(result)) return result; + + if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { + // a and b are zero, and the sign differs: return +0.0. + return 0.0; + } else { + return (a > b) ? a : b; + } +} + + +template +T Simulator::FPMaxNM(T a, T b) { + if (IsQuietNaN(a) && !IsQuietNaN(b)) { + a = kFP64NegativeInfinity; + } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { + b = kFP64NegativeInfinity; + } + + T result = FPProcessNaNs(a, b); + return IsNaN(result) ? result : FPMax(a, b); +} + + +template +T Simulator::FPMin(T a, T b) { + T result = FPProcessNaNs(a, b); + if (IsNaN(result)) return result; + + if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { + // a and b are zero, and the sign differs: return -0.0. + return -0.0; + } else { + return (a < b) ? a : b; + } +} + + +template +T Simulator::FPMinNM(T a, T b) { + if (IsQuietNaN(a) && !IsQuietNaN(b)) { + a = kFP64PositiveInfinity; + } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { + b = kFP64PositiveInfinity; + } + + T result = FPProcessNaNs(a, b); + return IsNaN(result) ? result : FPMin(a, b); +} + + +template +T Simulator::FPRecipStepFused(T op1, T op2) { + const T two = 2.0; + if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) { + return two; + } else if (IsInf(op1) || IsInf(op2)) { + // Return +inf if signs match, otherwise -inf. + return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity + : kFP64NegativeInfinity; + } else { + return FusedMultiplyAdd(op1, op2, two); + } +} + +template +bool IsNormal(T value) { + return std::isnormal(value); +} + +template <> +bool IsNormal(SimFloat16 value) { + uint16_t rawbits = Float16ToRawbits(value); + uint16_t exp_mask = 0x7c00; + // Check that the exponent is neither all zeroes or all ones. + return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0); +} + + +template +T Simulator::FPRSqrtStepFused(T op1, T op2) { + const T one_point_five = 1.5; + const T two = 2.0; + + if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) { + return one_point_five; + } else if (IsInf(op1) || IsInf(op2)) { + // Return +inf if signs match, otherwise -inf. + return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity + : kFP64NegativeInfinity; + } else { + // The multiply-add-halve operation must be fully fused, so avoid interim + // rounding by checking which operand can be losslessly divided by two + // before doing the multiply-add. + if (IsNormal(op1 / two)) { + return FusedMultiplyAdd(op1 / two, op2, one_point_five); + } else if (IsNormal(op2 / two)) { + return FusedMultiplyAdd(op1, op2 / two, one_point_five); + } else { + // Neither operand is normal after halving: the result is dominated by + // the addition term, so just return that. + return one_point_five; + } + } +} + +int32_t Simulator::FPToFixedJS(double value) { + // The Z-flag is set when the conversion from double precision floating-point + // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN, + // outside the bounds of a 32-bit integer, or isn't an exact integer then the + // Z-flag is unset. + int Z = 1; + int32_t result; + + if ((value == 0.0) || (value == kFP64PositiveInfinity) || + (value == kFP64NegativeInfinity)) { + // +/- zero and infinity all return zero, however -0 and +/- Infinity also + // unset the Z-flag. + result = 0.0; + if ((value != 0.0) || std::signbit(value)) { + Z = 0; + } + } else if (std::isnan(value)) { + // NaN values unset the Z-flag and set the result to 0. + FPProcessNaN(value); + result = 0; + Z = 0; + } else { + // All other values are converted to an integer representation, rounded + // toward zero. + double int_result = std::floor(value); + double error = value - int_result; + + if ((error != 0.0) && (int_result < 0.0)) { + int_result++; + } + + // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost + // write a one-liner with std::round, but the behaviour on ties is incorrect + // for our purposes. + double mod_const = static_cast(UINT64_C(1) << 32); + double mod_error = + (int_result / mod_const) - std::floor(int_result / mod_const); + double constrained; + if (mod_error == 0.5) { + constrained = INT32_MIN; + } else { + constrained = int_result - mod_const * round(int_result / mod_const); + } + + VIXL_ASSERT(std::floor(constrained) == constrained); + VIXL_ASSERT(constrained >= INT32_MIN); + VIXL_ASSERT(constrained <= INT32_MAX); + + // Take the bottom 32 bits of the result as a 32-bit integer. + result = static_cast(constrained); + + if ((int_result < INT32_MIN) || (int_result > INT32_MAX) || + (error != 0.0)) { + // If the integer result is out of range or the conversion isn't exact, + // take exception and unset the Z-flag. + FPProcessException(); + Z = 0; + } + } + + ReadNzcv().SetN(0); + ReadNzcv().SetZ(Z); + ReadNzcv().SetC(0); + ReadNzcv().SetV(0); + + return result; +} + +double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) { + VIXL_ASSERT((value != kFP64PositiveInfinity) && + (value != kFP64NegativeInfinity)); + VIXL_ASSERT(!IsNaN(value)); + + double int_result = std::floor(value); + double error = value - int_result; + switch (round_mode) { + case FPTieAway: { + // Take care of correctly handling the range ]-0.5, -0.0], which must + // yield -0.0. + if ((-0.5 < value) && (value < 0.0)) { + int_result = -0.0; + + } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { + // If the error is greater than 0.5, or is equal to 0.5 and the integer + // result is positive, round up. + int_result++; + } + break; + } + case FPTieEven: { + // Take care of correctly handling the range [-0.5, -0.0], which must + // yield -0.0. + if ((-0.5 <= value) && (value < 0.0)) { + int_result = -0.0; + + // If the error is greater than 0.5, or is equal to 0.5 and the integer + // result is odd, round up. + } else if ((error > 0.5) || + ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { + int_result++; + } + break; + } + case FPZero: { + // If value>0 then we take floor(value) + // otherwise, ceil(value). + if (value < 0) { + int_result = ceil(value); + } + break; + } + case FPNegativeInfinity: { + // We always use floor(value). + break; + } + case FPPositiveInfinity: { + // Take care of correctly handling the range ]-1.0, -0.0], which must + // yield -0.0. + if ((-1.0 < value) && (value < 0.0)) { + int_result = -0.0; + + // If the error is non-zero, round up. + } else if (error > 0.0) { + int_result++; + } + break; + } + default: + VIXL_UNIMPLEMENTED(); + } + return int_result; +} + +double Simulator::FPRoundInt(double value, FPRounding round_mode) { + if ((value == 0.0) || (value == kFP64PositiveInfinity) || + (value == kFP64NegativeInfinity)) { + return value; + } else if (IsNaN(value)) { + return FPProcessNaN(value); + } + return FPRoundIntCommon(value, round_mode); +} + +double Simulator::FPRoundInt(double value, + FPRounding round_mode, + FrintMode frint_mode) { + if (frint_mode == kFrintToInteger) { + return FPRoundInt(value, round_mode); + } + + VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64)); + + if (value == 0.0) { + return value; + } + + if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) || + IsNaN(value)) { + if (frint_mode == kFrintToInt32) { + return INT32_MIN; + } else { + return INT64_MIN; + } + } + + double result = FPRoundIntCommon(value, round_mode); + + // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly + // representable as a double, and is rounded to (INT64_MAX + 1) when + // converted. To avoid this, we compare `result >= int64_max_plus_one` + // instead; this is safe because `result` is known to be integral, and + // `int64_max_plus_one` is exactly representable as a double. + constexpr uint64_t int64_max_plus_one = static_cast(INT64_MAX) + 1; + VIXL_STATIC_ASSERT(static_cast(static_cast( + int64_max_plus_one)) == int64_max_plus_one); + + if (frint_mode == kFrintToInt32) { + if ((result > INT32_MAX) || (result < INT32_MIN)) { + return INT32_MIN; + } + } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) { + return INT64_MIN; + } + + return result; +} + +int16_t Simulator::FPToInt16(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + if (value >= kHMaxInt) { + return kHMaxInt; + } else if (value < kHMinInt) { + return kHMinInt; + } + return IsNaN(value) ? 0 : static_cast(value); +} + + +int32_t Simulator::FPToInt32(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + if (value >= kWMaxInt) { + return kWMaxInt; + } else if (value < kWMinInt) { + return kWMinInt; + } + return IsNaN(value) ? 0 : static_cast(value); +} + + +int64_t Simulator::FPToInt64(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues + // as a result of kMaxInt not being representable as a double. + if (value >= 9223372036854775808.) { + return kXMaxInt; + } else if (value < kXMinInt) { + return kXMinInt; + } + return IsNaN(value) ? 0 : static_cast(value); +} + + +uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + if (value >= kHMaxUInt) { + return kHMaxUInt; + } else if (value < 0.0) { + return 0; + } + return IsNaN(value) ? 0 : static_cast(value); +} + + +uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + if (value >= kWMaxUInt) { + return kWMaxUInt; + } else if (value < 0.0) { + return 0; + } + return IsNaN(value) ? 0 : static_cast(value); +} + + +uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { + value = FPRoundInt(value, rmode); + // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues + // as a result of kMaxUInt not being representable as a double. + if (value >= 18446744073709551616.) { + return kXMaxUInt; + } else if (value < 0.0) { + return 0; + } + return IsNaN(value) ? 0 : static_cast(value); +} + + +#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ + template \ + LogicVRegister Simulator::FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2) { \ + dst.ClearForWrite(vform); \ + for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ + T op1 = src1.Float(i); \ + T op2 = src2.Float(i); \ + T result; \ + if (PROCNAN) { \ + result = FPProcessNaNs(op1, op2); \ + if (!IsNaN(result)) { \ + result = OP(op1, op2); \ + } \ + } else { \ + result = OP(op1, op2); \ + } \ + dst.SetFloat(vform, i, result); \ + } \ + return dst; \ + } \ + \ + LogicVRegister Simulator::FN(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2) { \ + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \ + FN(vform, dst, src1, src2); \ + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ + FN(vform, dst, src1, src2); \ + } else { \ + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ + FN(vform, dst, src1, src2); \ + } \ + return dst; \ + } +NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) +#undef DEFINE_NEON_FP_VECTOR_OP + + +LogicVRegister Simulator::fnmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + LogicVRegister product = fmul(vform, temp, src1, src2); + return fneg(vform, dst, product); +} + + +template +LogicVRegister Simulator::frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = -src1.Float(i); + T op2 = src2.Float(i); + T result = FPProcessNaNs(op1, op2); + dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2)); + } + return dst; +} + + +LogicVRegister Simulator::frecps(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + frecps(vform, dst, src1, src2); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + frecps(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + frecps(vform, dst, src1, src2); + } + return dst; +} + + +template +LogicVRegister Simulator::frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = -src1.Float(i); + T op2 = src2.Float(i); + T result = FPProcessNaNs(op1, op2); + dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2)); + } + return dst; +} + + +LogicVRegister Simulator::frsqrts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + frsqrts(vform, dst, src1, src2); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + frsqrts(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + frsqrts(vform, dst, src1, src2); + } + return dst; +} + + +template +LogicVRegister Simulator::fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + bool result = false; + T op1 = src1.Float(i); + T op2 = src2.Float(i); + bool unordered = IsNaN(FPProcessNaNs(op1, op2)); + + switch (cond) { + case eq: + result = (op1 == op2); + break; + case ge: + result = (op1 >= op2); + break; + case gt: + result = (op1 > op2); + break; + case le: + result = (op1 <= op2); + break; + case lt: + result = (op1 < op2); + break; + case ne: + result = (op1 != op2); + break; + case uo: + result = unordered; + break; + default: + // Other conditions are defined in terms of those above. + VIXL_UNREACHABLE(); + break; + } + + if (result && unordered) { + // Only `uo` and `ne` can be true for unordered comparisons. + VIXL_ASSERT((cond == uo) || (cond == ne)); + } + + dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); + } + return dst; +} + + +LogicVRegister Simulator::fcmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fcmp(vform, dst, src1, src2, cond); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fcmp(vform, dst, src1, src2, cond); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fcmp(vform, dst, src1, src2, cond); + } + return dst; +} + + +LogicVRegister Simulator::fcmp_zero(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + Condition cond) { + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + LogicVRegister zero_reg = + dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0))); + fcmp(vform, dst, src, zero_reg, cond); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); + fcmp(vform, dst, src, zero_reg, cond); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); + fcmp(vform, dst, src, zero_reg, cond); + } + return dst; +} + + +LogicVRegister Simulator::fabscmp(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + Condition cond) { + SimVRegister temp1, temp2; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + LogicVRegister abs_src1 = fabs_(vform, temp1, src1); + LogicVRegister abs_src2 = fabs_(vform, temp2, src2); + fcmp(vform, dst, abs_src1, abs_src2, cond); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister abs_src1 = fabs_(vform, temp1, src1); + LogicVRegister abs_src2 = fabs_(vform, temp2, src2); + fcmp(vform, dst, abs_src1, abs_src2, cond); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister abs_src1 = fabs_(vform, temp1, src1); + LogicVRegister abs_src2 = fabs_(vform, temp2, src2); + fcmp(vform, dst, abs_src1, abs_src2, cond); + } + return dst; +} + + +template +LogicVRegister Simulator::fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = src1.Float(i); + T op2 = src2.Float(i); + T acc = srca.Float(i); + T result = FPMulAdd(acc, op1, op2); + dst.SetFloat(vform, i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fmla(vform, dst, srca, src1, src2); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fmla(vform, dst, srca, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fmla(vform, dst, srca, src1, src2); + } + return dst; +} + + +template +LogicVRegister Simulator::fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op1 = -src1.Float(i); + T op2 = src2.Float(i); + T acc = srca.Float(i); + T result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& srca, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fmls(vform, dst, srca, src1, src2); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fmls(vform, dst, srca, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fmls(vform, dst, srca, src1, src2); + } + return dst; +} + + +LogicVRegister Simulator::fmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op1 = FPToFloat(src1.Float(i), kIgnoreDefaultNaN); + float op2 = FPToFloat(src2.Float(i), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int src = i + LaneCountFromFormat(vform); + float op1 = FPToFloat(src1.Float(src), kIgnoreDefaultNaN); + float op2 = FPToFloat(src2.Float(src), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op1 = -FPToFloat(src1.Float(i), kIgnoreDefaultNaN); + float op2 = FPToFloat(src2.Float(i), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int src = i + LaneCountFromFormat(vform); + float op1 = -FPToFloat(src1.Float(src), kIgnoreDefaultNaN); + float op2 = FPToFloat(src2.Float(src), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmlal(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + float op2 = FPToFloat(src2.Float(index), kIgnoreDefaultNaN); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op1 = FPToFloat(src1.Float(i), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmlal2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + float op2 = FPToFloat(src2.Float(index), kIgnoreDefaultNaN); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int src = i + LaneCountFromFormat(vform); + float op1 = FPToFloat(src1.Float(src), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmlsl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + float op2 = FPToFloat(src2.Float(index), kIgnoreDefaultNaN); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float op1 = -FPToFloat(src1.Float(i), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::fmlsl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + dst.ClearForWrite(vform); + float op2 = FPToFloat(src2.Float(index), kIgnoreDefaultNaN); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int src = i + LaneCountFromFormat(vform); + float op1 = -FPToFloat(src1.Float(src), kIgnoreDefaultNaN); + float acc = dst.Float(i); + float result = FPMulAdd(acc, op1, op2); + dst.SetFloat(i, result); + } + return dst; +} + + +template +LogicVRegister Simulator::fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op = src.Float(i); + op = -op; + dst.SetFloat(i, op); + } + return dst; +} + + +LogicVRegister Simulator::fneg(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fneg(vform, dst, src); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fneg(vform, dst, src); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fneg(vform, dst, src); + } + return dst; +} + + +template +LogicVRegister Simulator::fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op = src.Float(i); + if (copysign(1.0, op) < 0.0) { + op = -op; + } + dst.SetFloat(i, op); + } + return dst; +} + + +LogicVRegister Simulator::fabs_(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fabs_(vform, dst, src); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fabs_(vform, dst, src); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fabs_(vform, dst, src); + } + return dst; +} + + +LogicVRegister Simulator::fabd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister temp; + fsub(vform, temp, src1, src2); + fabs_(vform, dst, temp); + return dst; +} + + +LogicVRegister Simulator::fsqrt(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SimFloat16 result = FPSqrt(src.Float(i)); + dst.SetFloat(i, result); + } + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float result = FPSqrt(src.Float(i)); + dst.SetFloat(i, result); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double result = FPSqrt(src.Float(i)); + dst.SetFloat(i, result); + } + } + return dst; +} + + +#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ + LogicVRegister Simulator::FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src1, \ + const LogicVRegister& src2) { \ + SimVRegister temp1, temp2; \ + uzp1(vform, temp1, src1, src2); \ + uzp2(vform, temp2, src1, src2); \ + FN(vform, dst, temp1, temp2); \ + if (IsSVEFormat(vform)) { \ + interleave_top_bottom(vform, dst, dst); \ + } \ + return dst; \ + } \ + \ + LogicVRegister Simulator::FNP(VectorFormat vform, \ + LogicVRegister dst, \ + const LogicVRegister& src) { \ + if (vform == kFormatH) { \ + SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \ + SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \ + dst.SetUint(vform, 0, Float16ToRawbits(result)); \ + } else if (vform == kFormatS) { \ + float result = OP(src.Float(0), src.Float(1)); \ + dst.SetFloat(0, result); \ + } else { \ + VIXL_ASSERT(vform == kFormatD); \ + double result = OP(src.Float(0), src.Float(1)); \ + dst.SetFloat(0, result); \ + } \ + dst.ClearForWrite(vform); \ + return dst; \ + } +NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) +#undef DEFINE_NEON_FP_PAIR_OP + +template +LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp::type fn, + uint64_t inactive_value) { + int lane_count = LaneCountFromFormat(vform); + T result[kZRegMaxSizeInBytes / sizeof(T)]; + // Copy the source vector into a working array. Initialise the unused elements + // at the end of the array to the same value that a false predicate would set. + for (int i = 0; i < static_cast(ArrayLength(result)); i++) { + result[i] = (i < lane_count) + ? src.Float(i) + : RawbitsWithSizeToFP(sizeof(T) * 8, inactive_value); + } + + // Pairwise reduce the elements to a single value, using the pair op function + // argument. + for (int step = 1; step < lane_count; step *= 2) { + for (int i = 0; i < lane_count; i += step * 2) { + result[i] = (this->*fn)(result[i], result[i + step]); + } + } + dst.ClearForWrite(ScalarFormatFromFormat(vform)); + dst.SetFloat(0, result[0]); + return dst; +} + +LogicVRegister Simulator::FPPairedAcrossHelper( + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + typename TFPPairOp::type fn16, + typename TFPPairOp::type fn32, + typename TFPPairOp::type fn64, + uint64_t inactive_value) { + switch (LaneSizeInBitsFromFormat(vform)) { + case kHRegSize: + return FPPairedAcrossHelper(vform, + dst, + src, + fn16, + inactive_value); + case kSRegSize: + return FPPairedAcrossHelper(vform, dst, src, fn32, inactive_value); + default: + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + return FPPairedAcrossHelper(vform, + dst, + src, + fn64, + inactive_value); + } +} + +LogicVRegister Simulator::faddv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPAdd, + &Simulator::FPAdd, + &Simulator::FPAdd, + 0); +} + +LogicVRegister Simulator::fmaxv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = + FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMax, + &Simulator::FPMax, + &Simulator::FPMax, + inactive_value); +} + + +LogicVRegister Simulator::fminv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = + FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMin, + &Simulator::FPMin, + &Simulator::FPMin, + inactive_value); +} + + +LogicVRegister Simulator::fmaxnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMaxNM, + &Simulator::FPMaxNM, + &Simulator::FPMaxNM, + inactive_value); +} + + +LogicVRegister Simulator::fminnmv(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_size = LaneSizeInBitsFromFormat(vform); + uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); + return FPPairedAcrossHelper(vform, + dst, + src, + &Simulator::FPMinNM, + &Simulator::FPMinNM, + &Simulator::FPMinNM, + inactive_value); +} + + +LogicVRegister Simulator::fmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); + fmul(vform, dst, src1, index_reg); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmul(vform, dst, src1, index_reg); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmul(vform, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::fmla(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); + fmla(vform, dst, dst, src1, index_reg); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmla(vform, dst, dst, src1, index_reg); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmla(vform, dst, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::fmls(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); + fmls(vform, dst, dst, src1, index_reg); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmls(vform, dst, dst, src1, index_reg); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmls(vform, dst, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::fmulx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + int index) { + dst.ClearForWrite(vform); + SimVRegister temp; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); + fmulx(vform, dst, src1, index_reg); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); + fmulx(vform, dst, src1, index_reg); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); + fmulx(vform, dst, src1, index_reg); + } + return dst; +} + + +LogicVRegister Simulator::frint(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding rounding_mode, + bool inexact_exception, + FrintMode frint_mode) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + VIXL_ASSERT(frint_mode == kFrintToInteger); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SimFloat16 input = src.Float(i); + SimFloat16 rounded = FPRoundInt(input, rounding_mode); + if (inexact_exception && !IsNaN(input) && (input != rounded)) { + FPProcessException(); + } + dst.SetFloat(i, rounded); + } + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float input = src.Float(i); + float rounded = FPRoundInt(input, rounding_mode, frint_mode); + + if (inexact_exception && !IsNaN(input) && (input != rounded)) { + FPProcessException(); + } + dst.SetFloat(i, rounded); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double input = src.Float(i); + double rounded = FPRoundInt(input, rounding_mode, frint_mode); + if (inexact_exception && !IsNaN(input) && (input != rounded)) { + FPProcessException(); + } + dst.SetFloat(i, rounded); + } + } + return dst; +} + +LogicVRegister Simulator::fcvt(VectorFormat dst_vform, + VectorFormat src_vform, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src) { + unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform); + unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform); + VectorFormat vform = SVEFormatFromLaneSizeInBits( + std::max(dst_data_size_in_bits, src_data_size_in_bits)); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + double dst_value = + RawbitsWithSizeToFP(src_data_size_in_bits, src_raw_bits); + + uint64_t dst_raw_bits = + FPToRawbitsWithSize(dst_data_size_in_bits, dst_value); + + dst.SetUint(vform, i, dst_raw_bits); + } + + return dst; +} + +LogicVRegister Simulator::fcvts(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + double result = RawbitsWithSizeToFP(src_data_size_in_bits, value) * + std::pow(2.0, fbits); + + switch (dst_data_size_in_bits) { + case kHRegSize: + dst.SetInt(vform, i, FPToInt16(result, round)); + break; + case kSRegSize: + dst.SetInt(vform, i, FPToInt32(result, round)); + break; + case kDRegSize: + dst.SetInt(vform, i, FPToInt64(result, round)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } + + return dst; +} + +LogicVRegister Simulator::fcvts(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding round, + int fbits) { + dst.ClearForWrite(vform); + return fcvts(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} + +LogicVRegister Simulator::fcvtu(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + double result = RawbitsWithSizeToFP(src_data_size_in_bits, value) * + std::pow(2.0, fbits); + + switch (dst_data_size_in_bits) { + case kHRegSize: + dst.SetUint(vform, i, FPToUInt16(result, round)); + break; + case kSRegSize: + dst.SetUint(vform, i, FPToUInt32(result, round)); + break; + case kDRegSize: + dst.SetUint(vform, i, FPToUInt64(result, round)); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + } + + return dst; +} + +LogicVRegister Simulator::fcvtu(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding round, + int fbits) { + dst.ClearForWrite(vform); + return fcvtu(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} + +LogicVRegister Simulator::fcvtl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + // TODO: Full support for SimFloat16 in SimRegister(s). + dst.SetFloat(i, + FPToFloat(RawbitsToFloat16(src.Float(i)), + ReadDN())); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + dst.SetFloat(i, FPToDouble(src.Float(i), ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtl2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_count = LaneCountFromFormat(vform); + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < lane_count; i++) { + // TODO: Full support for SimFloat16 in SimRegister(s). + dst.SetFloat(i, + FPToFloat(RawbitsToFloat16( + src.Float(i + lane_count)), + ReadDN())); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < lane_count; i++) { + dst.SetFloat(i, FPToDouble(src.Float(i + lane_count), ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister tmp; + LogicVRegister srctmp = mov(kFormat2D, tmp, src); + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetFloat(i, + Float16ToRawbits(FPToFloat16(srctmp.Float(i), + FPTieEven, + ReadDN()))); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetFloat(i, FPToFloat(srctmp.Float(i), FPTieEven, ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + int lane_count = LaneCountFromFormat(vform) / 2; + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = lane_count - 1; i >= 0; i--) { + dst.SetFloat(i + lane_count, + Float16ToRawbits( + FPToFloat16(src.Float(i), FPTieEven, ReadDN()))); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + for (int i = lane_count - 1; i >= 0; i--) { + dst.SetFloat(i + lane_count, + FPToFloat(src.Float(i), FPTieEven, ReadDN())); + } + } + return dst; +} + + +LogicVRegister Simulator::fcvtxn(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister tmp; + LogicVRegister srctmp = mov(kFormat2D, tmp, src); + int input_lane_count = LaneCountFromFormat(vform); + if (IsSVEFormat(vform)) { + mov(kFormatVnB, tmp, src); + input_lane_count /= 2; + } + + dst.ClearForWrite(vform); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + + for (int i = 0; i < input_lane_count; i++) { + dst.SetFloat(i, FPToFloat(srctmp.Float(i), FPRoundOdd, ReadDN())); + } + return dst; +} + + +LogicVRegister Simulator::fcvtxn2(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); + int lane_count = LaneCountFromFormat(vform) / 2; + for (int i = lane_count - 1; i >= 0; i--) { + dst.SetFloat(i + lane_count, + FPToFloat(src.Float(i), FPRoundOdd, ReadDN())); + } + return dst; +} + + +// Based on reference C function recip_sqrt_estimate from ARM ARM. +double Simulator::recip_sqrt_estimate(double a) { + int quot0, quot1, s; + double r; + if (a < 0.5) { + quot0 = static_cast(a * 512.0); + r = 1.0 / sqrt((static_cast(quot0) + 0.5) / 512.0); + } else { + quot1 = static_cast(a * 256.0); + r = 1.0 / sqrt((static_cast(quot1) + 0.5) / 256.0); + } + s = static_cast(256.0 * r + 0.5); + return static_cast(s) / 256.0; +} + + +static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { + return ExtractUnsignedBitfield64(start_bit, end_bit, val); +} + + +template +T Simulator::FPRecipSqrtEstimate(T op) { + if (IsNaN(op)) { + return FPProcessNaN(op); + } else if (op == 0.0) { + if (copysign(1.0, op) < 0.0) { + return kFP64NegativeInfinity; + } else { + return kFP64PositiveInfinity; + } + } else if (copysign(1.0, op) < 0.0) { + FPProcessException(); + return FPDefaultNaN(); + } else if (IsInf(op)) { + return 0.0; + } else { + uint64_t fraction; + int exp, result_exp; + + if (IsFloat16()) { + exp = Float16Exp(op); + fraction = Float16Mantissa(op); + fraction <<= 42; + } else if (IsFloat32()) { + exp = FloatExp(op); + fraction = FloatMantissa(op); + fraction <<= 29; + } else { + VIXL_ASSERT(IsFloat64()); + exp = DoubleExp(op); + fraction = DoubleMantissa(op); + } + + if (exp == 0) { + while (Bits(fraction, 51, 51) == 0) { + fraction = Bits(fraction, 50, 0) << 1; + exp -= 1; + } + fraction = Bits(fraction, 50, 0) << 1; + } + + double scaled; + if (Bits(exp, 0, 0) == 0) { + scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); + } else { + scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); + } + + if (IsFloat16()) { + result_exp = (44 - exp) / 2; + } else if (IsFloat32()) { + result_exp = (380 - exp) / 2; + } else { + VIXL_ASSERT(IsFloat64()); + result_exp = (3068 - exp) / 2; + } + + uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); + + if (IsFloat16()) { + uint16_t exp_bits = static_cast(Bits(result_exp, 4, 0)); + uint16_t est_bits = static_cast(Bits(estimate, 51, 42)); + return Float16Pack(0, exp_bits, est_bits); + } else if (IsFloat32()) { + uint32_t exp_bits = static_cast(Bits(result_exp, 7, 0)); + uint32_t est_bits = static_cast(Bits(estimate, 51, 29)); + return FloatPack(0, exp_bits, est_bits); + } else { + VIXL_ASSERT(IsFloat64()); + return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); + } + } +} + + +LogicVRegister Simulator::frsqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SimFloat16 input = src.Float(i); + dst.SetFloat(vform, i, FPRecipSqrtEstimate(input)); + } + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float input = src.Float(i); + dst.SetFloat(vform, i, FPRecipSqrtEstimate(input)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double input = src.Float(i); + dst.SetFloat(vform, i, FPRecipSqrtEstimate(input)); + } + } + return dst; +} + +template +T Simulator::FPRecipEstimate(T op, FPRounding rounding) { + uint32_t sign; + + if (IsFloat16()) { + sign = Float16Sign(op); + } else if (IsFloat32()) { + sign = FloatSign(op); + } else { + VIXL_ASSERT(IsFloat64()); + sign = DoubleSign(op); + } + + if (IsNaN(op)) { + return FPProcessNaN(op); + } else if (IsInf(op)) { + return (sign == 1) ? -0.0 : 0.0; + } else if (op == 0.0) { + FPProcessException(); // FPExc_DivideByZero exception. + return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; + } else if ((IsFloat16() && (std::fabs(op) < std::pow(2.0, -16.0))) || + (IsFloat32() && (std::fabs(op) < std::pow(2.0, -128.0))) || + (IsFloat64() && (std::fabs(op) < std::pow(2.0, -1024.0)))) { + bool overflow_to_inf = false; + switch (rounding) { + case FPTieEven: + overflow_to_inf = true; + break; + case FPPositiveInfinity: + overflow_to_inf = (sign == 0); + break; + case FPNegativeInfinity: + overflow_to_inf = (sign == 1); + break; + case FPZero: + overflow_to_inf = false; + break; + default: + break; + } + FPProcessException(); // FPExc_Overflow and FPExc_Inexact. + if (overflow_to_inf) { + return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; + } else { + // Return FPMaxNormal(sign). + if (IsFloat16()) { + return Float16Pack(sign, 0x1f, 0x3ff); + } else if (IsFloat32()) { + return FloatPack(sign, 0xfe, 0x07fffff); + } else { + VIXL_ASSERT(IsFloat64()); + return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); + } + } + } else { + uint64_t fraction; + int exp, result_exp; + + if (IsFloat16()) { + sign = Float16Sign(op); + exp = Float16Exp(op); + fraction = Float16Mantissa(op); + fraction <<= 42; + } else if (IsFloat32()) { + sign = FloatSign(op); + exp = FloatExp(op); + fraction = FloatMantissa(op); + fraction <<= 29; + } else { + VIXL_ASSERT(IsFloat64()); + sign = DoubleSign(op); + exp = DoubleExp(op); + fraction = DoubleMantissa(op); + } + + if (exp == 0) { + if (Bits(fraction, 51, 51) == 0) { + exp -= 1; + fraction = Bits(fraction, 49, 0) << 2; + } else { + fraction = Bits(fraction, 50, 0) << 1; + } + } + + double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); + + if (IsFloat16()) { + result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30. + } else if (IsFloat32()) { + result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. + } else { + VIXL_ASSERT(IsFloat64()); + result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. + } + + double estimate = recip_estimate(scaled); + + fraction = DoubleMantissa(estimate); + if (result_exp == 0) { + fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); + } else if (result_exp == -1) { + fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); + result_exp = 0; + } + if (IsFloat16()) { + uint16_t exp_bits = static_cast(Bits(result_exp, 4, 0)); + uint16_t frac_bits = static_cast(Bits(fraction, 51, 42)); + return Float16Pack(sign, exp_bits, frac_bits); + } else if (IsFloat32()) { + uint32_t exp_bits = static_cast(Bits(result_exp, 7, 0)); + uint32_t frac_bits = static_cast(Bits(fraction, 51, 29)); + return FloatPack(sign, exp_bits, frac_bits); + } else { + VIXL_ASSERT(IsFloat64()); + return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); + } + } +} + + +LogicVRegister Simulator::frecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + FPRounding round) { + dst.ClearForWrite(vform); + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + SimFloat16 input = src.Float(i); + dst.SetFloat(vform, i, FPRecipEstimate(input, round)); + } + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + float input = src.Float(i); + dst.SetFloat(vform, i, FPRecipEstimate(input, round)); + } + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double input = src.Float(i); + dst.SetFloat(vform, i, FPRecipEstimate(input, round)); + } + } + return dst; +} + + +LogicVRegister Simulator::ursqrte(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + uint64_t operand; + uint32_t result; + double dp_operand, dp_result; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + operand = src.Uint(vform, i); + if (operand <= 0x3FFFFFFF) { + result = 0xFFFFFFFF; + } else { + dp_operand = operand * std::pow(2.0, -32); + dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); + result = static_cast(dp_result); + } + dst.SetUint(vform, i, result); + } + return dst; +} + + +// Based on reference C function recip_estimate from ARM ARM. +double Simulator::recip_estimate(double a) { + int q, s; + double r; + q = static_cast(a * 512.0); + r = 1.0 / ((static_cast(q) + 0.5) / 512.0); + s = static_cast(256.0 * r + 0.5); + return static_cast(s) / 256.0; +} + + +LogicVRegister Simulator::urecpe(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + uint64_t operand; + uint32_t result; + double dp_operand, dp_result; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + operand = src.Uint(vform, i); + if (operand <= 0x7FFFFFFF) { + result = 0xFFFFFFFF; + } else { + dp_operand = operand * std::pow(2.0, -32); + dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); + result = static_cast(dp_result); + } + dst.SetUint(vform, i, result); + } + return dst; +} + +LogicPRegister Simulator::pfalse(LogicPRegister dst) { + dst.Clear(); + return dst; +} + +LogicPRegister Simulator::pfirst(LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + int first_pg = GetFirstActive(kFormatVnB, pg); + VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB)); + mov(dst, src); + if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true); + return dst; +} + +LogicPRegister Simulator::ptrue(VectorFormat vform, + LogicPRegister dst, + int pattern) { + int count = GetPredicateConstraintLaneCount(vform, pattern); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetActive(vform, i, i < count); + } + return dst; +} + +LogicPRegister Simulator::pnext(VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& pg, + const LogicPRegister& src) { + int next = GetLastActive(vform, src) + 1; + while (next < LaneCountFromFormat(vform)) { + if (pg.IsActive(vform, next)) break; + next++; + } + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + dst.SetActive(vform, i, (i == next)); + } + return dst; +} + +template +LogicVRegister Simulator::frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + dst.ClearForWrite(vform); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T op = src.Float(i); + T result; + if (IsNaN(op)) { + result = FPProcessNaN(op); + } else { + int exp; + uint32_t sign; + if (IsFloat16()) { + sign = Float16Sign(op); + exp = Float16Exp(op); + exp = (exp == 0) ? (0x1F - 1) : static_cast(Bits(~exp, 4, 0)); + result = Float16Pack(sign, exp, 0); + } else if (IsFloat32()) { + sign = FloatSign(op); + exp = FloatExp(op); + exp = (exp == 0) ? (0xFF - 1) : static_cast(Bits(~exp, 7, 0)); + result = FloatPack(sign, exp, 0); + } else { + VIXL_ASSERT(IsFloat64()); + sign = DoubleSign(op); + exp = DoubleExp(op); + exp = (exp == 0) ? (0x7FF - 1) : static_cast(Bits(~exp, 10, 0)); + result = DoublePack(sign, exp, 0); + } + } + dst.SetFloat(i, result); + } + return dst; +} + + +LogicVRegister Simulator::frecpx(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + frecpx(vform, dst, src); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + frecpx(vform, dst, src); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + frecpx(vform, dst, src); + } + return dst; +} + +LogicVRegister Simulator::flogb(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + double op = 0.0; + switch (vform) { + case kFormatVnH: + op = FPToDouble(src.Float(i), kIgnoreDefaultNaN); + break; + case kFormatVnS: + op = src.Float(i); + break; + case kFormatVnD: + op = src.Float(i); + break; + default: + VIXL_UNREACHABLE(); + } + + switch (std::fpclassify(op)) { + case FP_INFINITE: + dst.SetInt(vform, i, MaxIntFromFormat(vform)); + break; + case FP_NAN: + case FP_ZERO: + dst.SetInt(vform, i, MinIntFromFormat(vform)); + break; + case FP_SUBNORMAL: { + // DoubleMantissa returns the mantissa of its input, leaving 12 zero + // bits where the sign and exponent would be. We subtract 12 to + // find the number of leading zero bits in the mantissa itself. + int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12; + // Log2 of a subnormal is the lowest exponent a normal number can + // represent, together with the zeros in the mantissa. + dst.SetInt(vform, i, -1023 - mant_zero_count); + break; + } + case FP_NORMAL: + // Log2 of a normal number is the exponent minus the bias. + dst.SetInt(vform, i, static_cast(DoubleExp(op)) - 1023); + break; + } + } + return dst; +} + +LogicVRegister Simulator::ftsmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + SimVRegister maybe_neg_src1; + + // The bottom bit of src2 controls the sign of the result. Use it to + // conditionally invert the sign of one `fmul` operand. + shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1); + eor(vform, maybe_neg_src1, maybe_neg_src1, src1); + + // Multiply src1 by the modified neg_src1, which is potentially its negation. + // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1, + // rather than neg_src1, must be the first source argument. + fmul(vform, dst, src1, maybe_neg_src1); + + return dst; +} + +LogicVRegister Simulator::ftssel(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + unsigned lane_bits = LaneSizeInBitsFromFormat(vform); + uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1); + uint64_t one; + + if (lane_bits == kHRegSize) { + one = Float16ToRawbits(Float16(1.0)); + } else if (lane_bits == kSRegSize) { + one = FloatToRawbits(1.0); + } else { + VIXL_ASSERT(lane_bits == kDRegSize); + one = DoubleToRawbits(1.0); + } + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Use integer accessors for this operation, as this is a data manipulation + // task requiring no calculation. + uint64_t op = src1.Uint(vform, i); + + // Only the bottom two bits of the src2 register are significant, indicating + // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1 + // determines the sign of the value written to dst. + uint64_t q = src2.Uint(vform, i); + if ((q & 1) == 1) op = one; + if ((q & 2) == 2) op ^= sign_bit; + + dst.SetUint(vform, i, op); + } + + return dst; +} + +template +LogicVRegister Simulator::FTMaddHelper(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + uint64_t coeff_pos, + uint64_t coeff_neg) { + SimVRegister zero; + dup_immediate(kFormatVnB, zero, 0); + + SimVRegister cf; + SimVRegister cfn; + dup_immediate(vform, cf, coeff_pos); + dup_immediate(vform, cfn, coeff_neg); + + // The specification requires testing the top bit of the raw value, rather + // than the sign of the floating point number, so use an integer comparison + // here. + SimPRegister is_neg; + SVEIntCompareVectorsHelper(lt, + vform, + is_neg, + GetPTrue(), + src2, + zero, + false, + LeaveFlags); + mov_merging(vform, cf, is_neg, cfn); + + SimVRegister temp; + fabs_(vform, temp, src2); + fmla(vform, cf, cf, src1, temp); + mov(vform, dst, cf); + return dst; +} + + +LogicVRegister Simulator::ftmad(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + unsigned index) { + static const uint64_t ftmad_coeff16[] = {0x3c00, + 0xb155, + 0x2030, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x3c00, + 0xb800, + 0x293a, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000}; + + static const uint64_t ftmad_coeff32[] = {0x3f800000, + 0xbe2aaaab, + 0x3c088886, + 0xb95008b9, + 0x36369d6d, + 0x00000000, + 0x00000000, + 0x00000000, + 0x3f800000, + 0xbf000000, + 0x3d2aaaa6, + 0xbab60705, + 0x37cd37cc, + 0x00000000, + 0x00000000, + 0x00000000}; + + static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000, + 0xbfc5555555555543, + 0x3f8111111110f30c, + 0xbf2a01a019b92fc6, + 0x3ec71de351f3d22b, + 0xbe5ae5e2b60f7b91, + 0x3de5d8408868552f, + 0x0000000000000000, + 0x3ff0000000000000, + 0xbfe0000000000000, + 0x3fa5555555555536, + 0xbf56c16c16c13a0b, + 0x3efa01a019b1e8d8, + 0xbe927e4f7282f468, + 0x3e21ee96d2641b13, + 0xbda8f76380fbb401}; + VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64)); + VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64)); + VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64)); + + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + FTMaddHelper(vform, + dst, + src1, + src2, + ftmad_coeff16[index], + ftmad_coeff16[index + 8]); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + FTMaddHelper(vform, + dst, + src1, + src2, + ftmad_coeff32[index], + ftmad_coeff32[index + 8]); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + FTMaddHelper(vform, + dst, + src1, + src2, + ftmad_coeff64[index], + ftmad_coeff64[index + 8]); + } + return dst; +} + +LogicVRegister Simulator::fexpa(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045, + 0x005d, 0x0075, 0x008e, 0x00a8, + 0x00c2, 0x00dc, 0x00f8, 0x0114, + 0x0130, 0x014d, 0x016b, 0x0189, + 0x01a8, 0x01c8, 0x01e8, 0x0209, + 0x022b, 0x024e, 0x0271, 0x0295, + 0x02ba, 0x02e0, 0x0306, 0x032e, + 0x0356, 0x037f, 0x03a9, 0x03d4}; + + static const uint64_t fexpa_coeff32[] = + {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f, + 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b, + 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532, + 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, + 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf, + 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75, + 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd, + 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, + 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3, + 0x7d3e0c}; + + static const uint64_t fexpa_coeff64[] = + {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8, + 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0, + 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6, + 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b, + 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7, + 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0, + 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da, + 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225, + 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9, + 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed, + 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50, + 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf, + 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2, + 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c, + 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6, + 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8}; + + unsigned lane_size = LaneSizeInBitsFromFormat(vform); + int index_highbit = 5; + int op_highbit, op_shift; + const uint64_t* fexpa_coeff; + + if (lane_size == kHRegSize) { + index_highbit = 4; + VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1))); + fexpa_coeff = fexpa_coeff16; + op_highbit = 9; + op_shift = 10; + } else if (lane_size == kSRegSize) { + VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1))); + fexpa_coeff = fexpa_coeff32; + op_highbit = 13; + op_shift = 23; + } else { + VIXL_ASSERT(lane_size == kDRegSize); + VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1))); + fexpa_coeff = fexpa_coeff64; + op_highbit = 16; + op_shift = 52; + } + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t op = src.Uint(vform, i); + uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)]; + result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift); + dst.SetUint(vform, i, result); + } + return dst; +} + +template +LogicVRegister Simulator::fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + T two = T(2.0); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + T src1_val = src1.Float(i); + if (!IsNaN(src1_val)) { + int64_t scale = src2.Int(vform, i); + // TODO: this is a low-performance implementation, but it's simple and + // less likely to be buggy. Consider replacing it with something faster. + + // Scales outside of these bounds become infinity or zero, so there's no + // point iterating further. + scale = std::min(std::max(scale, -2048), 2048); + + // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and + // decrement scale until it's zero. + while (scale-- > 0) { + src1_val = FPMul(src1_val, two); + } + + // If scale is negative, divide by two and increment scale until it's + // zero. Initially, scale is (src2 - 1), so we pre-increment. + while (++scale < 0) { + src1_val = FPDiv(src1_val, two); + } + } + dst.SetFloat(i, src1_val); + } + return dst; +} + +LogicVRegister Simulator::fscale(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { + fscale(vform, dst, src1, src2); + } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fscale(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fscale(vform, dst, src1, src2); + } + return dst; +} + +LogicVRegister Simulator::scvtf(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + dst.ClearForWrite(vform); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + + switch (dst_data_size_in_bits) { + case kHRegSize: { + SimFloat16 result = FixedToFloat16(value, fbits, round); + dst.SetUint(vform, i, Float16ToRawbits(result)); + break; + } + case kSRegSize: { + float result = FixedToFloat(value, fbits, round); + dst.SetUint(vform, i, FloatToRawbits(result)); + break; + } + case kDRegSize: { + double result = FixedToDouble(value, fbits, round); + dst.SetUint(vform, i, DoubleToRawbits(result)); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } + } + + return dst; +} + +LogicVRegister Simulator::scvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding round) { + return scvtf(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} + +LogicVRegister Simulator::ucvtf(VectorFormat vform, + unsigned dst_data_size_in_bits, + unsigned src_data_size_in_bits, + LogicVRegister dst, + const LogicPRegister& pg, + const LogicVRegister& src, + FPRounding round, + int fbits) { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); + dst.ClearForWrite(vform); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, + 0, + src.Uint(vform, i)); + + switch (dst_data_size_in_bits) { + case kHRegSize: { + SimFloat16 result = UFixedToFloat16(value, fbits, round); + dst.SetUint(vform, i, Float16ToRawbits(result)); + break; + } + case kSRegSize: { + float result = UFixedToFloat(value, fbits, round); + dst.SetUint(vform, i, FloatToRawbits(result)); + break; + } + case kDRegSize: { + double result = UFixedToDouble(value, fbits, round); + dst.SetUint(vform, i, DoubleToRawbits(result)); + break; + } + default: + VIXL_UNIMPLEMENTED(); + break; + } + } + + return dst; +} + +LogicVRegister Simulator::ucvtf(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + int fbits, + FPRounding round) { + return ucvtf(vform, + LaneSizeInBitsFromFormat(vform), + LaneSizeInBitsFromFormat(vform), + dst, + GetPTrue(), + src, + round, + fbits); +} + +LogicVRegister Simulator::unpk(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src, + UnpackType unpack_type, + ExtendType extend_type) { + VectorFormat vform_half = VectorFormatHalfWidth(vform); + const int lane_count = LaneCountFromFormat(vform); + const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count; + + switch (extend_type) { + case kSignedExtend: { + int64_t result[kZRegMaxSizeInBytes]; + for (int i = 0; i < lane_count; ++i) { + result[i] = src.Int(vform_half, i + src_start_lane); + } + for (int i = 0; i < lane_count; ++i) { + dst.SetInt(vform, i, result[i]); + } + break; + } + case kUnsignedExtend: { + uint64_t result[kZRegMaxSizeInBytes]; + for (int i = 0; i < lane_count; ++i) { + result[i] = src.Uint(vform_half, i + src_start_lane); + } + for (int i = 0; i < lane_count; ++i) { + dst.SetUint(vform, i, result[i]); + } + break; + } + default: + VIXL_UNREACHABLE(); + } + return dst; +} + +LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond, + VectorFormat vform, + LogicPRegister dst, + const LogicPRegister& mask, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements, + FlagsUpdate flags) { + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + bool result = false; + if (mask.IsActive(vform, lane)) { + int64_t op1 = 0xbadbeef; + int64_t op2 = 0xbadbeef; + int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize; + switch (cond) { + case eq: + case ge: + case gt: + case lt: + case le: + case ne: + op1 = src1.Int(vform, lane); + op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane) + : src2.Int(vform, lane); + break; + case hi: + case hs: + case ls: + case lo: + op1 = src1.Uint(vform, lane); + op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane) + : src2.Uint(vform, lane); + break; + default: + VIXL_UNREACHABLE(); + } + + switch (cond) { + case eq: + result = (op1 == op2); + break; + case ne: + result = (op1 != op2); + break; + case ge: + result = (op1 >= op2); + break; + case gt: + result = (op1 > op2); + break; + case le: + result = (op1 <= op2); + break; + case lt: + result = (op1 < op2); + break; + case hs: + result = (static_cast(op1) >= static_cast(op2)); + break; + case hi: + result = (static_cast(op1) > static_cast(op2)); + break; + case ls: + result = (static_cast(op1) <= static_cast(op2)); + break; + case lo: + result = (static_cast(op1) < static_cast(op2)); + break; + default: + VIXL_UNREACHABLE(); + } + } + dst.SetActive(vform, lane, result); + } + + if (flags == SetFlags) PredTest(vform, mask, dst); + + return dst; +} + +LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op, + VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool is_wide_elements) { + unsigned lane_size = LaneSizeInBitsFromFormat(vform); + VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform; + + for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { + int shift_src_lane = lane; + if (is_wide_elements) { + // If the shift amount comes from wide elements, select the D-sized lane + // which occupies the corresponding lanes of the value to be shifted. + shift_src_lane = (lane * lane_size) / kDRegSize; + } + uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane); + + // Saturate shift_amount to the size of the lane that will be shifted. + if (shift_amount > lane_size) shift_amount = lane_size; + + uint64_t value = src1.Uint(vform, lane); + int64_t result = ShiftOperand(lane_size, + value, + shift_op, + static_cast(shift_amount)); + dst.SetUint(vform, lane, result); + } + + return dst; +} + +LogicVRegister Simulator::asrd(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + int shift) { + VIXL_ASSERT((shift > 0) && (static_cast(shift) <= + LaneSizeInBitsFromFormat(vform))); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + int64_t value = src1.Int(vform, i); + if (shift <= 63) { + if (value < 0) { + // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely + // cast to int64_t, and cannot cause signed overflow in the result. + value = value + GetUintMask(shift); + } + value = ShiftOperand(kDRegSize, value, ASR, shift); + } else { + value = 0; + } + dst.SetInt(vform, i, value); + } + return dst; +} + +LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper( + LogicalOp logical_op, + VectorFormat vform, + LogicVRegister zd, + const LogicVRegister& zn, + const LogicVRegister& zm) { + VIXL_ASSERT(IsSVEFormat(vform)); + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t op1 = zn.Uint(vform, i); + uint64_t op2 = zm.Uint(vform, i); + uint64_t result = 0; + switch (logical_op) { + case AND: + result = op1 & op2; + break; + case BIC: + result = op1 & ~op2; + break; + case EOR: + result = op1 ^ op2; + break; + case ORR: + result = op1 | op2; + break; + default: + VIXL_UNIMPLEMENTED(); + } + zd.SetUint(vform, i, result); + } + + return zd; +} + +LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, + LogicPRegister pd, + const LogicPRegister& pn, + const LogicPRegister& pm) { + for (int i = 0; i < pn.GetChunkCount(); i++) { + LogicPRegister::ChunkType op1 = pn.GetChunk(i); + LogicPRegister::ChunkType op2 = pm.GetChunk(i); + LogicPRegister::ChunkType result = 0; + switch (op) { + case ANDS_p_p_pp_z: + case AND_p_p_pp_z: + result = op1 & op2; + break; + case BICS_p_p_pp_z: + case BIC_p_p_pp_z: + result = op1 & ~op2; + break; + case EORS_p_p_pp_z: + case EOR_p_p_pp_z: + result = op1 ^ op2; + break; + case NANDS_p_p_pp_z: + case NAND_p_p_pp_z: + result = ~(op1 & op2); + break; + case NORS_p_p_pp_z: + case NOR_p_p_pp_z: + result = ~(op1 | op2); + break; + case ORNS_p_p_pp_z: + case ORN_p_p_pp_z: + result = op1 | ~op2; + break; + case ORRS_p_p_pp_z: + case ORR_p_p_pp_z: + result = op1 | op2; + break; + default: + VIXL_UNIMPLEMENTED(); + } + pd.SetChunk(i, result); + } + return pd; +} + +LogicVRegister Simulator::SVEBitwiseImmHelper( + SVEBitwiseLogicalWithImm_UnpredicatedOp op, + VectorFormat vform, + LogicVRegister zd, + uint64_t imm) { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t op1 = zd.Uint(vform, i); + uint64_t result = 0; + switch (op) { + case AND_z_zi: + result = op1 & imm; + break; + case EOR_z_zi: + result = op1 ^ imm; + break; + case ORR_z_zi: + result = op1 | imm; + break; + default: + VIXL_UNIMPLEMENTED(); + } + zd.SetUint(vform, i, result); + } + + return zd; +} + +void Simulator::SVEStructuredStoreHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr) { + VIXL_ASSERT(zt_code < kNumberOfZRegisters); + + int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); + int msize_in_bytes = addr.GetMsizeInBytes(); + int reg_count = addr.GetRegCount(); + + VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); + VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); + + unsigned zt_codes[4] = {zt_code, + (zt_code + 1) % kNumberOfZRegisters, + (zt_code + 2) % kNumberOfZRegisters, + (zt_code + 3) % kNumberOfZRegisters}; + + LogicVRegister zt[4] = { + ReadVRegister(zt_codes[0]), + ReadVRegister(zt_codes[1]), + ReadVRegister(zt_codes[2]), + ReadVRegister(zt_codes[3]), + }; + + // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes + // are ignored, so read the source register using the VectorFormat that + // corresponds with the storage format, and multiply the index accordingly. + VectorFormat unpack_vform = + SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); + int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2; + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (!pg.IsActive(vform, i)) continue; + + for (int r = 0; r < reg_count; r++) { + uint64_t element_address = addr.GetElementAddress(i, r); + StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address); + } + } + + if (ShouldTraceWrites()) { + PrintRegisterFormat format = GetPrintRegisterFormat(vform); + if (esize_in_bytes_log2 == msize_in_bytes_log2) { + // Use an FP format where it's likely that we're accessing FP data. + format = GetPrintRegisterFormatTryFP(format); + } + // Stores don't represent a change to the source register's value, so only + // print the relevant part of the value. + format = GetPrintRegPartial(format); + + PrintZStructAccess(zt_code, + reg_count, + pg, + format, + msize_in_bytes, + "->", + addr); + } +} + +void Simulator::SVEStructuredLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + bool is_signed) { + int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); + int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); + int msize_in_bytes = addr.GetMsizeInBytes(); + int reg_count = addr.GetRegCount(); + + VIXL_ASSERT(zt_code < kNumberOfZRegisters); + VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); + VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); + + unsigned zt_codes[4] = {zt_code, + (zt_code + 1) % kNumberOfZRegisters, + (zt_code + 2) % kNumberOfZRegisters, + (zt_code + 3) % kNumberOfZRegisters}; + LogicVRegister zt[4] = { + ReadVRegister(zt_codes[0]), + ReadVRegister(zt_codes[1]), + ReadVRegister(zt_codes[2]), + ReadVRegister(zt_codes[3]), + }; + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + for (int r = 0; r < reg_count; r++) { + uint64_t element_address = addr.GetElementAddress(i, r); + + if (!pg.IsActive(vform, i)) { + zt[r].SetUint(vform, i, 0); + continue; + } + + if (is_signed) { + LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address); + } else { + LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address); + } + } + } + + if (ShouldTraceVRegs()) { + PrintRegisterFormat format = GetPrintRegisterFormat(vform); + if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) { + // Use an FP format where it's likely that we're accessing FP data. + format = GetPrintRegisterFormatTryFP(format); + } + PrintZStructAccess(zt_code, + reg_count, + pg, + format, + msize_in_bytes, + "<-", + addr); + } +} + +LogicPRegister Simulator::brka(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn) { + bool break_ = false; + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + if (pg.IsActive(kFormatVnB, i)) { + pd.SetActive(kFormatVnB, i, !break_); + break_ |= pn.IsActive(kFormatVnB, i); + } + } + + return pd; +} + +LogicPRegister Simulator::brkb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn) { + bool break_ = false; + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + if (pg.IsActive(kFormatVnB, i)) { + break_ |= pn.IsActive(kFormatVnB, i); + pd.SetActive(kFormatVnB, i, !break_); + } + } + + return pd; +} + +LogicPRegister Simulator::brkn(LogicPRegister pdm, + const LogicPRegister& pg, + const LogicPRegister& pn) { + if (!IsLastActive(kFormatVnB, pg, pn)) { + pfalse(pdm); + } + return pdm; +} + +LogicPRegister Simulator::brkpa(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm) { + bool last_active = IsLastActive(kFormatVnB, pg, pn); + + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + bool active = false; + if (pg.IsActive(kFormatVnB, i)) { + active = last_active; + last_active = last_active && !pm.IsActive(kFormatVnB, i); + } + pd.SetActive(kFormatVnB, i, active); + } + + return pd; +} + +LogicPRegister Simulator::brkpb(LogicPRegister pd, + const LogicPRegister& pg, + const LogicPRegister& pn, + const LogicPRegister& pm) { + bool last_active = IsLastActive(kFormatVnB, pg, pn); + + for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { + bool active = false; + if (pg.IsActive(kFormatVnB, i)) { + last_active = last_active && !pm.IsActive(kFormatVnB, i); + active = last_active; + } + pd.SetActive(kFormatVnB, i, active); + } + + return pd; +} + +void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform, + const LogicPRegister& pg, + unsigned zt_code, + const LogicSVEAddressVector& addr, + SVEFaultTolerantLoadType type, + bool is_signed) { + int esize_in_bytes = LaneSizeInBytesFromFormat(vform); + int msize_in_bits = addr.GetMsizeInBits(); + int msize_in_bytes = addr.GetMsizeInBytes(); + + VIXL_ASSERT(zt_code < kNumberOfZRegisters); + VIXL_ASSERT(esize_in_bytes >= msize_in_bytes); + VIXL_ASSERT(addr.GetRegCount() == 1); + + LogicVRegister zt = ReadVRegister(zt_code); + LogicPRegister ffr = ReadFFR(); + + // Non-faulting loads are allowed to fail arbitrarily. To stress user + // code, fail a random element in roughly one in eight full-vector loads. + uint32_t rnd = static_cast(jrand48(rand_state_)); + int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8); + + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + uint64_t value = 0; + + if (pg.IsActive(vform, i)) { + uint64_t element_address = addr.GetElementAddress(i, 0); + + if (type == kSVEFirstFaultLoad) { + // First-faulting loads always load the first active element, regardless + // of FFR. The result will be discarded if its FFR lane is inactive, but + // it could still generate a fault. + value = MemReadUint(msize_in_bytes, element_address); + // All subsequent elements have non-fault semantics. + type = kSVENonFaultLoad; + + } else if (ffr.IsActive(vform, i)) { + // Simulation of fault-tolerant loads relies on system calls, and is + // likely to be relatively slow, so we only actually perform the load if + // its FFR lane is active. + + bool can_read = (i < fake_fault_at_lane) && + CanReadMemory(element_address, msize_in_bytes); + if (can_read) { + value = MemReadUint(msize_in_bytes, element_address); + } else { + // Propagate the fault to the end of FFR. + for (int j = i; j < LaneCountFromFormat(vform); j++) { + ffr.SetActive(vform, j, false); + } + } + } + } + + // The architecture permits a few possible results for inactive FFR lanes + // (including those caused by a fault in this instruction). We choose to + // leave the register value unchanged (like merging predication) because + // no other input to this instruction can have the same behaviour. + // + // Note that this behaviour takes precedence over pg's zeroing predication. + + if (ffr.IsActive(vform, i)) { + int msb = msize_in_bits - 1; + if (is_signed) { + zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value)); + } else { + zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value)); + } + } + } + + if (ShouldTraceVRegs()) { + PrintRegisterFormat format = GetPrintRegisterFormat(vform); + if ((esize_in_bytes == msize_in_bytes) && !is_signed) { + // Use an FP format where it's likely that we're accessing FP data. + format = GetPrintRegisterFormatTryFP(format); + } + // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess + // expects a single mask, so combine the two predicates. + SimPRegister mask; + SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr); + PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr); + } +} + +void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr, + VectorFormat vform, + SVEOffsetModifier mod) { + bool is_signed = instr->ExtractBit(14) == 0; + bool is_ff = instr->ExtractBit(13) == 1; + // Note that these instructions don't use the Dtype encoding. + int msize_in_bytes_log2 = instr->ExtractBits(24, 23); + int scale = instr->ExtractBit(21) * msize_in_bytes_log2; + uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); + LogicSVEAddressVector addr(base, + &ReadVRegister(instr->GetRm()), + vform, + mod, + scale); + addr.SetMsizeInBytesLog2(msize_in_bytes_log2); + if (is_ff) { + SVEFaultTolerantLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + kSVEFirstFaultLoad, + is_signed); + } else { + SVEStructuredLoadHelper(vform, + ReadPRegister(instr->GetPgLow8()), + instr->GetRt(), + addr, + is_signed); + } +} + +int Simulator::GetFirstActive(VectorFormat vform, + const LogicPRegister& pg) const { + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + if (pg.IsActive(vform, i)) return i; + } + return -1; +} + +int Simulator::GetLastActive(VectorFormat vform, + const LogicPRegister& pg) const { + for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { + if (pg.IsActive(vform, i)) return i; + } + return -1; +} + +int Simulator::CountActiveLanes(VectorFormat vform, + const LogicPRegister& pg) const { + int count = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + count += pg.IsActive(vform, i) ? 1 : 0; + } + return count; +} + +int Simulator::CountActiveAndTrueLanes(VectorFormat vform, + const LogicPRegister& pg, + const LogicPRegister& pn) const { + int count = 0; + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0; + } + return count; +} + +int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform, + int pattern) const { + VIXL_ASSERT(IsSVEFormat(vform)); + int all = LaneCountFromFormat(vform); + VIXL_ASSERT(all > 0); + + switch (pattern) { + case SVE_VL1: + case SVE_VL2: + case SVE_VL3: + case SVE_VL4: + case SVE_VL5: + case SVE_VL6: + case SVE_VL7: + case SVE_VL8: + // VL1-VL8 are encoded directly. + VIXL_STATIC_ASSERT(SVE_VL1 == 1); + VIXL_STATIC_ASSERT(SVE_VL8 == 8); + return (pattern <= all) ? pattern : 0; + case SVE_VL16: + case SVE_VL32: + case SVE_VL64: + case SVE_VL128: + case SVE_VL256: { + // VL16-VL256 are encoded as log2(N) + c. + int min = 16 << (pattern - SVE_VL16); + return (min <= all) ? min : 0; + } + // Special cases. + case SVE_POW2: + return 1 << HighestSetBitPosition(all); + case SVE_MUL4: + return all - (all % 4); + case SVE_MUL3: + return all - (all % 3); + case SVE_ALL: + return all; + } + // Unnamed cases architecturally return 0. + return 0; +} + +LogicPRegister Simulator::match(VectorFormat vform, + LogicPRegister dst, + const LogicVRegister& haystack, + const LogicVRegister& needles, + bool negate_match) { + SimVRegister ztemp; + SimPRegister ptemp; + + pfalse(dst); + int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform); + for (int i = 0; i < lanes_per_segment; i++) { + dup_elements_to_segments(vform, ztemp, needles, i); + SVEIntCompareVectorsHelper(eq, + vform, + ptemp, + GetPTrue(), + haystack, + ztemp, + false, + LeaveFlags); + SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp); + } + if (negate_match) { + ptrue(vform, ptemp, SVE_ALL); + SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp); + } + return dst; +} + +uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const { + if (IsContiguous()) { + return base_ + (lane * GetRegCount()) * GetMsizeInBytes(); + } + + VIXL_ASSERT(IsScatterGather()); + VIXL_ASSERT(vector_ != NULL); + + // For scatter-gather accesses, we need to extract the offset from vector_, + // and apply modifiers. + + uint64_t offset = 0; + switch (vector_form_) { + case kFormatVnS: + offset = vector_->GetLane(lane); + break; + case kFormatVnD: + offset = vector_->GetLane(lane); + break; + default: + VIXL_UNIMPLEMENTED(); + break; + } + + switch (vector_mod_) { + case SVE_MUL_VL: + VIXL_UNIMPLEMENTED(); + break; + case SVE_LSL: + // We apply the shift below. There's nothing to do here. + break; + case NO_SVE_OFFSET_MODIFIER: + VIXL_ASSERT(vector_shift_ == 0); + break; + case SVE_UXTW: + offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset); + break; + case SVE_SXTW: + offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset); + break; + } + + return base_ + (offset << vector_shift_); +} + +LogicVRegister Simulator::pack_odd_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister zero; + zero.Clear(); + return uzp2(vform, dst, src, zero); +} + +LogicVRegister Simulator::pack_even_elements(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src) { + SimVRegister zero; + zero.Clear(); + return uzp1(vform, dst, src, zero); +} + +LogicVRegister Simulator::adcl(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool top) { + unsigned reg_size = LaneSizeInBitsFromFormat(vform); + VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize)); + + for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { + uint64_t left = src1.Uint(vform, i + (top ? 1 : 0)); + uint64_t right = dst.Uint(vform, i); + unsigned carry_in = src2.Uint(vform, i + 1) & 1; + std::pair val_and_flags = + AddWithCarry(reg_size, left, right, carry_in); + + // Set even lanes to the result of the addition. + dst.SetUint(vform, i, val_and_flags.first); + + // Set odd lanes to the carry flag from the addition. + uint64_t carry_out = (val_and_flags.second >> 1) & 1; + dst.SetUint(vform, i + 1, carry_out); + } + return dst; +} + +// Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add +// the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst. +// +// Matrices of the form: +// +// src1 = ( a b c d e f g h ) src2 = ( A B ) +// ( i j k l m n o p ) ( C D ) +// ( E F ) +// ( G H ) +// ( I J ) +// ( K L ) +// ( M N ) +// ( O P ) +// +// Are stored in the input vector registers as: +// +// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 +// src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ] +// src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ] +// +LogicVRegister Simulator::matmul(VectorFormat vform_dst, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2, + bool src1_signed, + bool src2_signed) { + // Two destination forms are supported: Q register containing four S-sized + // elements (4S) and Z register containing n S-sized elements (VnS). + VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS)); + VectorFormat vform_src = kFormatVnB; + int b_per_segment = kQRegSize / kBRegSize; + int s_per_segment = kQRegSize / kSRegSize; + int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {}; + int segment_count = LaneCountFromFormat(vform_dst) / 4; + for (int seg = 0; seg < segment_count; seg++) { + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + int dstidx = (2 * i) + j + (seg * s_per_segment); + int64_t sum = srcdst.Int(vform_dst, dstidx); + for (int k = 0; k < 8; k++) { + int idx1 = (8 * i) + k + (seg * b_per_segment); + int idx2 = (8 * j) + k + (seg * b_per_segment); + int64_t e1 = src1_signed ? src1.Int(vform_src, idx1) + : src1.Uint(vform_src, idx1); + int64_t e2 = src2_signed ? src2.Int(vform_src, idx2) + : src2.Uint(vform_src, idx2); + sum += e1 * e2; + } + result[dstidx] = sum; + } + } + } + srcdst.SetIntArray(vform_dst, result); + return srcdst; +} + +// Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2 +// result to the matrix in srcdst, and write back to srcdst. +// +// Matrices of the form: +// +// src1 = ( a b ) src2 = ( A B ) +// ( c d ) ( C D ) +// +// Are stored in the input vector registers as: +// +// 3 2 1 0 +// src1 = [ d | c | b | a ] +// src2 = [ D | B | C | A ] +// +template +LogicVRegister Simulator::fmatmul(VectorFormat vform, + LogicVRegister srcdst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + T result[kZRegMaxSizeInBytes / sizeof(T)]; + int T_per_segment = 4; + int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T)); + for (int seg = 0; seg < segment_count; seg++) { + int segoff = seg * T_per_segment; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + T prod0 = FPMulNaNs(src1.Float(2 * i + 0 + segoff), + src2.Float(2 * j + 0 + segoff)); + T prod1 = FPMulNaNs(src1.Float(2 * i + 1 + segoff), + src2.Float(2 * j + 1 + segoff)); + T sum = FPAdd(srcdst.Float(2 * i + j + segoff), prod0); + result[2 * i + j + segoff] = FPAdd(sum, prod1); + } + } + } + for (int i = 0; i < LaneCountFromFormat(vform); i++) { + // Elements outside a multiple of 4T are set to zero. This happens only + // for double precision operations, when the VL is a multiple of 128 bits, + // but not a multiple of 256 bits. + T value = (i < (T_per_segment * segment_count)) ? result[i] : 0; + srcdst.SetFloat(vform, i, value); + } + return srcdst; +} + +LogicVRegister Simulator::fmatmul(VectorFormat vform, + LogicVRegister dst, + const LogicVRegister& src1, + const LogicVRegister& src2) { + if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { + fmatmul(vform, dst, src1, src2); + } else { + VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); + fmatmul(vform, dst, src1, src2); + } + return dst; +} + +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 diff --git a/3rdparty/vixl/src/aarch64/macro-assembler-aarch64.cc b/3rdparty/vixl/src/aarch64/macro-assembler-aarch64.cc new file mode 100644 index 0000000000..cee9218d2d --- /dev/null +++ b/3rdparty/vixl/src/aarch64/macro-assembler-aarch64.cc @@ -0,0 +1,3121 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "macro-assembler-aarch64.h" + +namespace vixl { +namespace aarch64 { + + +void Pool::Release() { + if (--monitor_ == 0) { + // Ensure the pool has not been blocked for too long. + VIXL_ASSERT(masm_->GetCursorOffset() < checkpoint_); + } +} + + +void Pool::SetNextCheckpoint(ptrdiff_t checkpoint) { + masm_->checkpoint_ = std::min(masm_->checkpoint_, checkpoint); + checkpoint_ = checkpoint; +} + + +LiteralPool::LiteralPool(MacroAssembler* masm) + : Pool(masm), + size_(0), + first_use_(-1), + recommended_checkpoint_(kNoCheckpointRequired) {} + + +LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { + VIXL_ASSERT(IsEmpty()); + VIXL_ASSERT(!IsBlocked()); + for (std::vector::iterator it = deleted_on_destruction_.begin(); + it != deleted_on_destruction_.end(); + it++) { + delete *it; + } +} + + +void LiteralPool::Reset() { + std::vector::iterator it, end; + for (it = entries_.begin(), end = entries_.end(); it != end; ++it) { + RawLiteral* literal = *it; + if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) { + delete literal; + } + } + entries_.clear(); + size_ = 0; + first_use_ = -1; + Pool::Reset(); + recommended_checkpoint_ = kNoCheckpointRequired; +} + + +void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) { + if (IsEmpty() || IsBlocked()) return; + + ptrdiff_t distance = masm_->GetCursorOffset() + amount - first_use_; + if (distance >= kRecommendedLiteralPoolRange) { + Emit(option); + } +} + + +void LiteralPool::CheckEmitForBranch(size_t range) { + if (IsEmpty() || IsBlocked()) return; + if (GetMaxSize() >= range) Emit(); +} + +// We use a subclass to access the protected `ExactAssemblyScope` constructor +// giving us control over the pools. This allows us to use this scope within +// code emitting pools without creating a circular dependency. +// We keep the constructor private to restrict usage of this helper class. +class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope { + private: + ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm, size_t size) + : ExactAssemblyScope(masm, + size, + ExactAssemblyScope::kExactSize, + ExactAssemblyScope::kIgnorePools) {} + + friend void LiteralPool::Emit(LiteralPool::EmitOption); + friend void VeneerPool::Emit(VeneerPool::EmitOption, size_t); +}; + + +void LiteralPool::Emit(EmitOption option) { + // There is an issue if we are asked to emit a blocked or empty pool. + VIXL_ASSERT(!IsBlocked()); + VIXL_ASSERT(!IsEmpty()); + + size_t pool_size = GetSize(); + size_t emit_size = pool_size; + if (option == kBranchRequired) emit_size += kInstructionSize; + Label end_of_pool; + + VIXL_ASSERT(emit_size % kInstructionSize == 0); + { + CodeBufferCheckScope guard(masm_, + emit_size, + CodeBufferCheckScope::kCheck, + CodeBufferCheckScope::kExactSize); +#ifdef VIXL_DEBUG + // Also explicitly disallow usage of the `MacroAssembler` here. + masm_->SetAllowMacroInstructions(false); +#endif + if (option == kBranchRequired) { + ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize); + masm_->b(&end_of_pool); + } + + { + // Marker indicating the size of the literal pool in 32-bit words. + VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0); + ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize); + masm_->ldr(xzr, static_cast(pool_size / kWRegSizeInBytes)); + } + + // Now populate the literal pool. + std::vector::iterator it, end; + for (it = entries_.begin(), end = entries_.end(); it != end; ++it) { + VIXL_ASSERT((*it)->IsUsed()); + masm_->place(*it); + } + + if (option == kBranchRequired) masm_->bind(&end_of_pool); +#ifdef VIXL_DEBUG + masm_->SetAllowMacroInstructions(true); +#endif + } + + Reset(); +} + + +void LiteralPool::AddEntry(RawLiteral* literal) { + // A literal must be registered immediately before its first use. Here we + // cannot control that it is its first use, but we check no code has been + // emitted since its last use. + VIXL_ASSERT(masm_->GetCursorOffset() == literal->GetLastUse()); + + UpdateFirstUse(masm_->GetCursorOffset()); + VIXL_ASSERT(masm_->GetCursorOffset() >= first_use_); + entries_.push_back(literal); + size_ += literal->GetSize(); +} + + +void LiteralPool::UpdateFirstUse(ptrdiff_t use_position) { + first_use_ = std::min(first_use_, use_position); + if (first_use_ == -1) { + first_use_ = use_position; + SetNextRecommendedCheckpoint(GetNextRecommendedCheckpoint()); + SetNextCheckpoint(first_use_ + Instruction::kLoadLiteralRange); + } else { + VIXL_ASSERT(use_position > first_use_); + } +} + + +void VeneerPool::Reset() { + Pool::Reset(); + unresolved_branches_.Reset(); +} + + +void VeneerPool::Release() { + if (--monitor_ == 0) { + VIXL_ASSERT(IsEmpty() || + masm_->GetCursorOffset() < + unresolved_branches_.GetFirstLimit()); + } +} + + +void VeneerPool::RegisterUnresolvedBranch(ptrdiff_t branch_pos, + Label* label, + ImmBranchType branch_type) { + VIXL_ASSERT(!label->IsBound()); + BranchInfo branch_info = BranchInfo(branch_pos, label, branch_type); + unresolved_branches_.insert(branch_info); + UpdateNextCheckPoint(); + // TODO: In debug mode register the label with the assembler to make sure it + // is bound with masm Bind and not asm bind. +} + + +void VeneerPool::DeleteUnresolvedBranchInfoForLabel(Label* label) { + if (IsEmpty()) { + VIXL_ASSERT(checkpoint_ == kNoCheckpointRequired); + return; + } + + if (label->IsLinked()) { + Label::LabelLinksIterator links_it(label); + for (; !links_it.Done(); links_it.Advance()) { + ptrdiff_t link_offset = *links_it.Current(); + Instruction* link = masm_->GetInstructionAt(link_offset); + + // ADR instructions are not handled. + if (BranchTypeUsesVeneers(link->GetBranchType())) { + BranchInfo branch_info(link_offset, label, link->GetBranchType()); + unresolved_branches_.erase(branch_info); + } + } + } + + UpdateNextCheckPoint(); +} + + +bool VeneerPool::ShouldEmitVeneer(int64_t first_unreacheable_pc, + size_t amount) { + ptrdiff_t offset = + kPoolNonVeneerCodeSize + amount + GetMaxSize() + GetOtherPoolsMaxSize(); + return (masm_->GetCursorOffset() + offset) > first_unreacheable_pc; +} + + +void VeneerPool::CheckEmitFor(size_t amount, EmitOption option) { + if (IsEmpty()) return; + + VIXL_ASSERT(masm_->GetCursorOffset() + kPoolNonVeneerCodeSize < + unresolved_branches_.GetFirstLimit()); + + if (IsBlocked()) return; + + if (ShouldEmitVeneers(amount)) { + Emit(option, amount); + } else { + UpdateNextCheckPoint(); + } +} + + +void VeneerPool::Emit(EmitOption option, size_t amount) { + // There is an issue if we are asked to emit a blocked or empty pool. + VIXL_ASSERT(!IsBlocked()); + VIXL_ASSERT(!IsEmpty()); + + Label end; + if (option == kBranchRequired) { + ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize); + masm_->b(&end); + } + + // We want to avoid generating veneer pools too often, so generate veneers for + // branches that don't immediately require a veneer but will soon go out of + // range. + static const size_t kVeneerEmissionMargin = 1 * KBytes; + + for (BranchInfoSetIterator it(&unresolved_branches_); !it.Done();) { + BranchInfo* branch_info = it.Current(); + if (ShouldEmitVeneer(branch_info->first_unreacheable_pc_, + amount + kVeneerEmissionMargin)) { + CodeBufferCheckScope scope(masm_, + kVeneerCodeSize, + CodeBufferCheckScope::kCheck, + CodeBufferCheckScope::kExactSize); + ptrdiff_t branch_pos = branch_info->pc_offset_; + Instruction* branch = masm_->GetInstructionAt(branch_pos); + Label* label = branch_info->label_; + + // Patch the branch to point to the current position, and emit a branch + // to the label. + Instruction* veneer = masm_->GetCursorAddress(); + branch->SetImmPCOffsetTarget(veneer); + { + ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize); + masm_->b(label); + } + + // Update the label. The branch patched does not point to it any longer. + label->DeleteLink(branch_pos); + + it.DeleteCurrentAndAdvance(); + } else { + it.AdvanceToNextType(); + } + } + + UpdateNextCheckPoint(); + + masm_->bind(&end); +} + + +MacroAssembler::MacroAssembler(byte* buffer, + size_t capacity, + PositionIndependentCodeOption pic) + : Assembler(buffer, capacity, pic), +#ifdef VIXL_DEBUG + allow_macro_instructions_(true), +#endif + generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE), + sp_(sp), + tmp_list_(ip0, ip1), + v_tmp_list_(d31), + p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)), + current_scratch_scope_(NULL), + literal_pool_(this), + veneer_pool_(this), + recommended_checkpoint_(Pool::kNoCheckpointRequired), + fp_nan_propagation_(NoFPMacroNaNPropagationSelected) { + checkpoint_ = GetNextCheckPoint(); +} + + +MacroAssembler::~MacroAssembler() {} + + +void MacroAssembler::Reset() { + Assembler::Reset(); + + VIXL_ASSERT(!literal_pool_.IsBlocked()); + literal_pool_.Reset(); + veneer_pool_.Reset(); + + checkpoint_ = GetNextCheckPoint(); +} + + +void MacroAssembler::FinalizeCode(FinalizeOption option) { + if (!literal_pool_.IsEmpty()) { + // The user may decide to emit more code after Finalize, emit a branch if + // that's the case. + literal_pool_.Emit(option == kUnreachable ? Pool::kNoBranchRequired + : Pool::kBranchRequired); + } + VIXL_ASSERT(veneer_pool_.IsEmpty()); + + Assembler::FinalizeCode(); +} + + +void MacroAssembler::CheckEmitFor(size_t amount) { + CheckEmitPoolsFor(amount); + VIXL_ASSERT(GetBuffer()->HasSpaceFor(amount)); +} + + +void MacroAssembler::CheckEmitPoolsFor(size_t amount) { + literal_pool_.CheckEmitFor(amount); + veneer_pool_.CheckEmitFor(amount); + checkpoint_ = GetNextCheckPoint(); +} + + +int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm, + const Register& rd, + uint64_t imm) { + bool emit_code = (masm != NULL); + VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits()); + // The worst case for size is mov 64-bit immediate to sp: + // * up to 4 instructions to materialise the constant + // * 1 instruction to move to sp + MacroEmissionCheckScope guard(masm); + + // Immediates on Aarch64 can be produced using an initial value, and zero to + // three move keep operations. + // + // Initial values can be generated with: + // 1. 64-bit move zero (movz). + // 2. 32-bit move inverted (movn). + // 3. 64-bit move inverted. + // 4. 32-bit orr immediate. + // 5. 64-bit orr immediate. + // Move-keep may then be used to modify each of the 16-bit half words. + // + // The code below supports all five initial value generators, and + // applying move-keep operations to move-zero and move-inverted initial + // values. + + // Try to move the immediate in one instruction, and if that fails, switch to + // using multiple instructions. + if (OneInstrMoveImmediateHelper(masm, rd, imm)) { + return 1; + } else { + int instruction_count = 0; + unsigned reg_size = rd.GetSizeInBits(); + + // Generic immediate case. Imm will be represented by + // [imm3, imm2, imm1, imm0], where each imm is 16 bits. + // A move-zero or move-inverted is generated for the first non-zero or + // non-0xffff immX, and a move-keep for subsequent non-zero immX. + + uint64_t ignored_halfword = 0; + bool invert_move = false; + // If the number of 0xffff halfwords is greater than the number of 0x0000 + // halfwords, it's more efficient to use move-inverted. + if (CountClearHalfWords(~imm, reg_size) > + CountClearHalfWords(imm, reg_size)) { + ignored_halfword = 0xffff; + invert_move = true; + } + + // Mov instructions can't move values into the stack pointer, so set up a + // temporary register, if needed. + UseScratchRegisterScope temps; + Register temp; + if (emit_code) { + temps.Open(masm); + temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd; + } + + // Iterate through the halfwords. Use movn/movz for the first non-ignored + // halfword, and movk for subsequent halfwords. + VIXL_ASSERT((reg_size % 16) == 0); + bool first_mov_done = false; + for (unsigned i = 0; i < (reg_size / 16); i++) { + uint64_t imm16 = (imm >> (16 * i)) & 0xffff; + if (imm16 != ignored_halfword) { + if (!first_mov_done) { + if (invert_move) { + if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i); + instruction_count++; + } else { + if (emit_code) masm->movz(temp, imm16, 16 * i); + instruction_count++; + } + first_mov_done = true; + } else { + // Construct a wider constant. + if (emit_code) masm->movk(temp, imm16, 16 * i); + instruction_count++; + } + } + } + + VIXL_ASSERT(first_mov_done); + + // Move the temporary if the original destination register was the stack + // pointer. + if (rd.IsSP()) { + if (emit_code) masm->mov(rd, temp); + instruction_count++; + } + return instruction_count; + } +} + + +void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) { + VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) && + ((bit == -1) || (type >= kBranchTypeFirstUsingBit))); + if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) { + B(static_cast(type), label); + } else { + switch (type) { + case always: + B(label); + break; + case never: + break; + case reg_zero: + Cbz(reg, label); + break; + case reg_not_zero: + Cbnz(reg, label); + break; + case reg_bit_clear: + Tbz(reg, bit, label); + break; + case reg_bit_set: + Tbnz(reg, bit, label); + break; + default: + VIXL_UNREACHABLE(); + } + } +} + + +void MacroAssembler::B(Label* label) { + // We don't need to check the size of the literal pool, because the size of + // the literal pool is already bounded by the literal range, which is smaller + // than the range of this branch. + VIXL_ASSERT(Instruction::GetImmBranchForwardRange(UncondBranchType) > + Instruction::kLoadLiteralRange); + SingleEmissionCheckScope guard(this); + b(label); +} + + +void MacroAssembler::B(Label* label, Condition cond) { + // We don't need to check the size of the literal pool, because the size of + // the literal pool is already bounded by the literal range, which is smaller + // than the range of this branch. + VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CondBranchType) > + Instruction::kLoadLiteralRange); + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT((cond != al) && (cond != nv)); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) { + Label done; + b(&done, InvertCondition(cond)); + b(label); + bind(&done); + } else { + if (!label->IsBound()) { + veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(), + label, + CondBranchType); + } + b(label, cond); + } +} + + +void MacroAssembler::Cbnz(const Register& rt, Label* label) { + // We don't need to check the size of the literal pool, because the size of + // the literal pool is already bounded by the literal range, which is smaller + // than the range of this branch. + VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) > + Instruction::kLoadLiteralRange); + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) { + Label done; + cbz(rt, &done); + b(label); + bind(&done); + } else { + if (!label->IsBound()) { + veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(), + label, + CompareBranchType); + } + cbnz(rt, label); + } +} + + +void MacroAssembler::Cbz(const Register& rt, Label* label) { + // We don't need to check the size of the literal pool, because the size of + // the literal pool is already bounded by the literal range, which is smaller + // than the range of this branch. + VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) > + Instruction::kLoadLiteralRange); + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) { + Label done; + cbnz(rt, &done); + b(label); + bind(&done); + } else { + if (!label->IsBound()) { + veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(), + label, + CompareBranchType); + } + cbz(rt, label); + } +} + + +void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) { + // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch + // can become impossible because we emit the literal pool first. + literal_pool_.CheckEmitForBranch( + Instruction::GetImmBranchForwardRange(TestBranchType)); + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) { + Label done; + tbz(rt, bit_pos, &done); + b(label); + bind(&done); + } else { + if (!label->IsBound()) { + veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(), + label, + TestBranchType); + } + tbnz(rt, bit_pos, label); + } +} + + +void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) { + // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch + // can become impossible because we emit the literal pool first. + literal_pool_.CheckEmitForBranch( + Instruction::GetImmBranchForwardRange(TestBranchType)); + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!rt.IsZero()); + EmissionCheckScope guard(this, 2 * kInstructionSize); + + if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) { + Label done; + tbnz(rt, bit_pos, &done); + b(label); + bind(&done); + } else { + if (!label->IsBound()) { + veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(), + label, + TestBranchType); + } + tbz(rt, bit_pos, label); + } +} + +void MacroAssembler::Bind(Label* label, BranchTargetIdentifier id) { + VIXL_ASSERT(allow_macro_instructions_); + veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label); + if (id == EmitBTI_none) { + bind(label); + } else { + // Emit this inside an ExactAssemblyScope to ensure there are no extra + // instructions between the bind and the target identifier instruction. + ExactAssemblyScope scope(this, kInstructionSize); + bind(label); + if (id == EmitPACIASP) { + paciasp(); + } else if (id == EmitPACIBSP) { + pacibsp(); + } else { + bti(id); + } + } +} + +// Bind a label to a specified offset from the start of the buffer. +void MacroAssembler::BindToOffset(Label* label, ptrdiff_t offset) { + VIXL_ASSERT(allow_macro_instructions_); + veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label); + Assembler::BindToOffset(label, offset); +} + + +void MacroAssembler::And(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, AND); +} + + +void MacroAssembler::Ands(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, ANDS); +} + + +void MacroAssembler::Tst(const Register& rn, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + Ands(AppropriateZeroRegFor(rn), rn, operand); +} + + +void MacroAssembler::Bic(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, BIC); +} + + +void MacroAssembler::Bics(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, BICS); +} + + +void MacroAssembler::Orr(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, ORR); +} + + +void MacroAssembler::Orn(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, ORN); +} + + +void MacroAssembler::Eor(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, EOR); +} + + +void MacroAssembler::Eon(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + LogicalMacro(rd, rn, operand, EON); +} + + +void MacroAssembler::LogicalMacro(const Register& rd, + const Register& rn, + const Operand& operand, + LogicalOp op) { + // The worst case for size is logical immediate to sp: + // * up to 4 instructions to materialise the constant + // * 1 instruction to do the operation + // * 1 instruction to move to sp + MacroEmissionCheckScope guard(this); + UseScratchRegisterScope temps(this); + // Use `rd` as a temp, if we can. + temps.Include(rd); + // We read `rn` after evaluating `operand`. + temps.Exclude(rn); + // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`) + // because we don't need it after it is evaluated. + + if (operand.IsImmediate()) { + uint64_t immediate = operand.GetImmediate(); + unsigned reg_size = rd.GetSizeInBits(); + + // If the operation is NOT, invert the operation and immediate. + if ((op & NOT) == NOT) { + op = static_cast(op & ~NOT); + immediate = ~immediate; + } + + // Ignore the top 32 bits of an immediate if we're moving to a W register. + if (rd.Is32Bits()) { + // Check that the top 32 bits are consistent. + VIXL_ASSERT(((immediate >> kWRegSize) == 0) || + ((immediate >> kWRegSize) == 0xffffffff)); + immediate &= kWRegMask; + } + + VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate)); + + // Special cases for all set or all clear immediates. + if (immediate == 0) { + switch (op) { + case AND: + Mov(rd, 0); + return; + case ORR: + VIXL_FALLTHROUGH(); + case EOR: + Mov(rd, rn); + return; + case ANDS: + VIXL_FALLTHROUGH(); + case BICS: + break; + default: + VIXL_UNREACHABLE(); + } + } else if ((rd.Is64Bits() && (immediate == UINT64_C(0xffffffffffffffff))) || + (rd.Is32Bits() && (immediate == UINT64_C(0x00000000ffffffff)))) { + switch (op) { + case AND: + Mov(rd, rn); + return; + case ORR: + Mov(rd, immediate); + return; + case EOR: + Mvn(rd, rn); + return; + case ANDS: + VIXL_FALLTHROUGH(); + case BICS: + break; + default: + VIXL_UNREACHABLE(); + } + } + + unsigned n, imm_s, imm_r; + if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) { + // Immediate can be encoded in the instruction. + LogicalImmediate(rd, rn, n, imm_s, imm_r, op); + } else { + // Immediate can't be encoded: synthesize using move immediate. + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Aliases(rn)); + + // If the left-hand input is the stack pointer, we can't pre-shift the + // immediate, as the encoding won't allow the subsequent post shift. + PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift; + Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode); + + if (rd.Is(sp) || rd.Is(wsp)) { + // If rd is the stack pointer we cannot use it as the destination + // register so we use the temp register as an intermediate again. + Logical(temp, rn, imm_operand, op); + Mov(rd, temp); + } else { + Logical(rd, rn, imm_operand, op); + } + } + } else if (operand.IsExtendedRegister()) { + VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits()); + // Add/sub extended supports shift <= 4. We want to support exactly the + // same modes here. + VIXL_ASSERT(operand.GetShiftAmount() <= 4); + VIXL_ASSERT( + operand.GetRegister().Is64Bits() || + ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX))); + + Register temp = temps.AcquireSameSizeAs(rn); + VIXL_ASSERT(!temp.Aliases(rn)); + EmitExtendShift(temp, + operand.GetRegister(), + operand.GetExtend(), + operand.GetShiftAmount()); + Logical(rd, rn, Operand(temp), op); + } else { + // The operand can be encoded in the instruction. + VIXL_ASSERT(operand.IsShiftedRegister()); + Logical(rd, rn, operand, op); + } +} + + +void MacroAssembler::Mov(const Register& rd, + const Operand& operand, + DiscardMoveMode discard_mode) { + VIXL_ASSERT(allow_macro_instructions_); + // The worst case for size is mov immediate with up to 4 instructions. + MacroEmissionCheckScope guard(this); + + if (operand.IsImmediate()) { + // Call the macro assembler for generic immediates. + Mov(rd, operand.GetImmediate()); + } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) { + // Emit a shift instruction if moving a shifted register. This operation + // could also be achieved using an orr instruction (like orn used by Mvn), + // but using a shift instruction makes the disassembly clearer. + EmitShift(rd, + operand.GetRegister(), + operand.GetShift(), + operand.GetShiftAmount()); + } else if (operand.IsExtendedRegister()) { + // Emit an extend instruction if moving an extended register. This handles + // extend with post-shift operations, too. + EmitExtendShift(rd, + operand.GetRegister(), + operand.GetExtend(), + operand.GetShiftAmount()); + } else { + Mov(rd, operand.GetRegister(), discard_mode); + } +} + + +void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) { + VIXL_ASSERT(IsUint16(imm)); + int byte1 = (imm & 0xff); + int byte2 = ((imm >> 8) & 0xff); + if (byte1 == byte2) { + movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1); + } else if (byte1 == 0) { + movi(vd, byte2, LSL, 8); + } else if (byte2 == 0) { + movi(vd, byte1); + } else if (byte1 == 0xff) { + mvni(vd, ~byte2 & 0xff, LSL, 8); + } else if (byte2 == 0xff) { + mvni(vd, ~byte1 & 0xff); + } else { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireW(); + movz(temp, imm); + dup(vd, temp); + } +} + + +void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) { + VIXL_ASSERT(IsUint32(imm)); + + uint8_t bytes[sizeof(imm)]; + memcpy(bytes, &imm, sizeof(imm)); + + // All bytes are either 0x00 or 0xff. + { + bool all0orff = true; + for (int i = 0; i < 4; ++i) { + if ((bytes[i] != 0) && (bytes[i] != 0xff)) { + all0orff = false; + break; + } + } + + if (all0orff == true) { + movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm)); + return; + } + } + + // Of the 4 bytes, only one byte is non-zero. + for (int i = 0; i < 4; i++) { + if ((imm & (0xff << (i * 8))) == imm) { + movi(vd, bytes[i], LSL, i * 8); + return; + } + } + + // Of the 4 bytes, only one byte is not 0xff. + for (int i = 0; i < 4; i++) { + uint32_t mask = ~(0xff << (i * 8)); + if ((imm & mask) == mask) { + mvni(vd, ~bytes[i] & 0xff, LSL, i * 8); + return; + } + } + + // Immediate is of the form 0x00MMFFFF. + if ((imm & 0xff00ffff) == 0x0000ffff) { + movi(vd, bytes[2], MSL, 16); + return; + } + + // Immediate is of the form 0x0000MMFF. + if ((imm & 0xffff00ff) == 0x000000ff) { + movi(vd, bytes[1], MSL, 8); + return; + } + + // Immediate is of the form 0xFFMM0000. + if ((imm & 0xff00ffff) == 0xff000000) { + mvni(vd, ~bytes[2] & 0xff, MSL, 16); + return; + } + // Immediate is of the form 0xFFFFMM00. + if ((imm & 0xffff00ff) == 0xffff0000) { + mvni(vd, ~bytes[1] & 0xff, MSL, 8); + return; + } + + // Top and bottom 16-bits are equal. + if (((imm >> 16) & 0xffff) == (imm & 0xffff)) { + Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff); + return; + } + + // Default case. + { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireW(); + Mov(temp, imm); + dup(vd, temp); + } +} + + +void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) { + // All bytes are either 0x00 or 0xff. + { + bool all0orff = true; + for (int i = 0; i < 8; ++i) { + int byteval = (imm >> (i * 8)) & 0xff; + if (byteval != 0 && byteval != 0xff) { + all0orff = false; + break; + } + } + if (all0orff == true) { + movi(vd, imm); + return; + } + } + + // Top and bottom 32-bits are equal. + if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) { + Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff); + return; + } + + // Default case. + { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireX(); + Mov(temp, imm); + if (vd.Is1D()) { + fmov(vd.D(), temp); + } else { + dup(vd.V2D(), temp); + } + } +} + + +void MacroAssembler::Movi(const VRegister& vd, + uint64_t imm, + Shift shift, + int shift_amount) { + VIXL_ASSERT(allow_macro_instructions_); + MacroEmissionCheckScope guard(this); + if (shift_amount != 0 || shift != LSL) { + movi(vd, imm, shift, shift_amount); + } else if (vd.Is8B() || vd.Is16B()) { + // 8-bit immediate. + VIXL_ASSERT(IsUint8(imm)); + movi(vd, imm); + } else if (vd.Is4H() || vd.Is8H()) { + // 16-bit immediate. + Movi16bitHelper(vd, imm); + } else if (vd.Is2S() || vd.Is4S()) { + // 32-bit immediate. + Movi32bitHelper(vd, imm); + } else { + // 64-bit immediate. + Movi64bitHelper(vd, imm); + } +} + + +void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) { + // TODO: Move 128-bit values in a more efficient way. + VIXL_ASSERT(vd.Is128Bits()); + if (hi == lo) { + Movi(vd.V2D(), lo); + return; + } + + Movi(vd.V1D(), lo); + + if (hi != 0) { + UseScratchRegisterScope temps(this); + // TODO: Figure out if using a temporary V register to materialise the + // immediate is better. + Register temp = temps.AcquireX(); + Mov(temp, hi); + Ins(vd.V2D(), 1, temp); + } +} + + +void MacroAssembler::Mvn(const Register& rd, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + // The worst case for size is mvn immediate with up to 4 instructions. + MacroEmissionCheckScope guard(this); + + if (operand.IsImmediate()) { + // Call the macro assembler for generic immediates. + Mvn(rd, operand.GetImmediate()); + } else if (operand.IsExtendedRegister()) { + // Emit two instructions for the extend case. This differs from Mov, as + // the extend and invert can't be achieved in one instruction. + EmitExtendShift(rd, + operand.GetRegister(), + operand.GetExtend(), + operand.GetShiftAmount()); + mvn(rd, rd); + } else { + // Otherwise, register and shifted register cases can be handled by the + // assembler directly, using orn. + mvn(rd, operand); + } +} + + +void MacroAssembler::Mov(const Register& rd, uint64_t imm) { + VIXL_ASSERT(allow_macro_instructions_); + MoveImmediateHelper(this, rd, imm); +} + + +void MacroAssembler::Ccmp(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + if (operand.IsImmediate() && (operand.GetImmediate() < 0)) { + ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN); + } else { + ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP); + } +} + + +void MacroAssembler::Ccmn(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond) { + VIXL_ASSERT(allow_macro_instructions_); + if (operand.IsImmediate() && (operand.GetImmediate() < 0)) { + ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP); + } else { + ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN); + } +} + + +void MacroAssembler::ConditionalCompareMacro(const Register& rn, + const Operand& operand, + StatusFlags nzcv, + Condition cond, + ConditionalCompareOp op) { + VIXL_ASSERT((cond != al) && (cond != nv)); + // The worst case for size is ccmp immediate: + // * up to 4 instructions to materialise the constant + // * 1 instruction for ccmp + MacroEmissionCheckScope guard(this); + + if ((operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)) || + (operand.IsImmediate() && + IsImmConditionalCompare(operand.GetImmediate()))) { + // The immediate can be encoded in the instruction, or the operand is an + // unshifted register: call the assembler. + ConditionalCompare(rn, operand, nzcv, cond, op); + } else { + UseScratchRegisterScope temps(this); + // The operand isn't directly supported by the instruction: perform the + // operation on a temporary register. + Register temp = temps.AcquireSameSizeAs(rn); + Mov(temp, operand); + ConditionalCompare(rn, temp, nzcv, cond, op); + } +} + + +void MacroAssembler::CselHelper(MacroAssembler* masm, + const Register& rd, + Operand left, + Operand right, + Condition cond, + bool* should_synthesise_left, + bool* should_synthesise_right) { + bool emit_code = (masm != NULL); + + VIXL_ASSERT(!emit_code || masm->allow_macro_instructions_); + VIXL_ASSERT((cond != al) && (cond != nv)); + VIXL_ASSERT(!rd.IsZero() && !rd.IsSP()); + VIXL_ASSERT(left.IsImmediate() || !left.GetRegister().IsSP()); + VIXL_ASSERT(right.IsImmediate() || !right.GetRegister().IsSP()); + + if (should_synthesise_left != NULL) *should_synthesise_left = false; + if (should_synthesise_right != NULL) *should_synthesise_right = false; + + // The worst case for size occurs when the inputs are two non encodable + // constants: + // * up to 4 instructions to materialise the left constant + // * up to 4 instructions to materialise the right constant + // * 1 instruction for csel + EmissionCheckScope guard(masm, 9 * kInstructionSize); + UseScratchRegisterScope temps; + if (masm != NULL) { + temps.Open(masm); + } + + // Try to handle cases where both inputs are immediates. + bool left_is_immediate = left.IsImmediate() || left.IsZero(); + bool right_is_immediate = right.IsImmediate() || right.IsZero(); + if (left_is_immediate && right_is_immediate && + CselSubHelperTwoImmediates(masm, + rd, + left.GetEquivalentImmediate(), + right.GetEquivalentImmediate(), + cond, + should_synthesise_left, + should_synthesise_right)) { + return; + } + + // Handle cases where one of the two inputs is -1, 0, or 1. + bool left_is_small_immediate = + left_is_immediate && ((-1 <= left.GetEquivalentImmediate()) && + (left.GetEquivalentImmediate() <= 1)); + bool right_is_small_immediate = + right_is_immediate && ((-1 <= right.GetEquivalentImmediate()) && + (right.GetEquivalentImmediate() <= 1)); + if (right_is_small_immediate || left_is_small_immediate) { + bool swapped_inputs = false; + if (!right_is_small_immediate) { + std::swap(left, right); + cond = InvertCondition(cond); + swapped_inputs = true; + } + CselSubHelperRightSmallImmediate(masm, + &temps, + rd, + left, + right, + cond, + swapped_inputs ? should_synthesise_right + : should_synthesise_left); + return; + } + + // Otherwise both inputs need to be available in registers. Synthesise them + // if necessary and emit the `csel`. + if (!left.IsPlainRegister()) { + if (emit_code) { + Register temp = temps.AcquireSameSizeAs(rd); + masm->Mov(temp, left); + left = temp; + } + if (should_synthesise_left != NULL) *should_synthesise_left = true; + } + if (!right.IsPlainRegister()) { + if (emit_code) { + Register temp = temps.AcquireSameSizeAs(rd); + masm->Mov(temp, right); + right = temp; + } + if (should_synthesise_right != NULL) *should_synthesise_right = true; + } + if (emit_code) { + VIXL_ASSERT(left.IsPlainRegister() && right.IsPlainRegister()); + if (left.GetRegister().Is(right.GetRegister())) { + masm->Mov(rd, left.GetRegister()); + } else { + masm->csel(rd, left.GetRegister(), right.GetRegister(), cond); + } + } +} + + +bool MacroAssembler::CselSubHelperTwoImmediates(MacroAssembler* masm, + const Register& rd, + int64_t left, + int64_t right, + Condition cond, + bool* should_synthesise_left, + bool* should_synthesise_right) { + bool emit_code = (masm != NULL); + if (should_synthesise_left != NULL) *should_synthesise_left = false; + if (should_synthesise_right != NULL) *should_synthesise_right = false; + + if (left == right) { + if (emit_code) masm->Mov(rd, left); + return true; + } else if (left == -right) { + if (should_synthesise_right != NULL) *should_synthesise_right = true; + if (emit_code) { + masm->Mov(rd, right); + masm->Cneg(rd, rd, cond); + } + return true; + } + + if (CselSubHelperTwoOrderedImmediates(masm, rd, left, right, cond)) { + return true; + } else { + std::swap(left, right); + if (CselSubHelperTwoOrderedImmediates(masm, + rd, + left, + right, + InvertCondition(cond))) { + return true; + } + } + + // TODO: Handle more situations. For example handle `csel rd, #5, #6, cond` + // with `cinc`. + return false; +} + + +bool MacroAssembler::CselSubHelperTwoOrderedImmediates(MacroAssembler* masm, + const Register& rd, + int64_t left, + int64_t right, + Condition cond) { + bool emit_code = (masm != NULL); + + if ((left == 1) && (right == 0)) { + if (emit_code) masm->cset(rd, cond); + return true; + } else if ((left == -1) && (right == 0)) { + if (emit_code) masm->csetm(rd, cond); + return true; + } + return false; +} + + +void MacroAssembler::CselSubHelperRightSmallImmediate( + MacroAssembler* masm, + UseScratchRegisterScope* temps, + const Register& rd, + const Operand& left, + const Operand& right, + Condition cond, + bool* should_synthesise_left) { + bool emit_code = (masm != NULL); + VIXL_ASSERT((right.IsImmediate() || right.IsZero()) && + (-1 <= right.GetEquivalentImmediate()) && + (right.GetEquivalentImmediate() <= 1)); + Register left_register; + + if (left.IsPlainRegister()) { + left_register = left.GetRegister(); + } else { + if (emit_code) { + left_register = temps->AcquireSameSizeAs(rd); + masm->Mov(left_register, left); + } + if (should_synthesise_left != NULL) *should_synthesise_left = true; + } + if (emit_code) { + int64_t imm = right.GetEquivalentImmediate(); + Register zr = AppropriateZeroRegFor(rd); + if (imm == 0) { + masm->csel(rd, left_register, zr, cond); + } else if (imm == 1) { + masm->csinc(rd, left_register, zr, cond); + } else { + VIXL_ASSERT(imm == -1); + masm->csinv(rd, left_register, zr, cond); + } + } +} + + +void MacroAssembler::Add(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S) { + VIXL_ASSERT(allow_macro_instructions_); + if (operand.IsImmediate()) { + int64_t imm = operand.GetImmediate(); + if ((imm < 0) && (imm != std::numeric_limits::min()) && + IsImmAddSub(-imm)) { + AddSubMacro(rd, rn, -imm, S, SUB); + return; + } + } + AddSubMacro(rd, rn, operand, S, ADD); +} + + +void MacroAssembler::Adds(const Register& rd, + const Register& rn, + const Operand& operand) { + Add(rd, rn, operand, SetFlags); +} + +#define MINMAX(V) \ + V(Smax, smax, IsInt8) \ + V(Smin, smin, IsInt8) \ + V(Umax, umax, IsUint8) \ + V(Umin, umin, IsUint8) + +#define VIXL_DEFINE_MASM_FUNC(MASM, ASM, RANGE) \ + void MacroAssembler::MASM(const Register& rd, \ + const Register& rn, \ + const Operand& op) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + if (op.IsImmediate()) { \ + int64_t imm = op.GetImmediate(); \ + if (!RANGE(imm)) { \ + UseScratchRegisterScope temps(this); \ + Register temp = temps.AcquireSameSizeAs(rd); \ + Mov(temp, imm); \ + MASM(rd, rn, temp); \ + return; \ + } \ + } \ + SingleEmissionCheckScope guard(this); \ + ASM(rd, rn, op); \ + } +MINMAX(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +void MacroAssembler::St2g(const Register& rt, const MemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + st2g(rt, addr); +} + +void MacroAssembler::Stg(const Register& rt, const MemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stg(rt, addr); +} + +void MacroAssembler::Stgp(const Register& rt1, + const Register& rt2, + const MemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stgp(rt1, rt2, addr); +} + +void MacroAssembler::Stz2g(const Register& rt, const MemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stz2g(rt, addr); +} + +void MacroAssembler::Stzg(const Register& rt, const MemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + stzg(rt, addr); +} + +void MacroAssembler::Ldg(const Register& rt, const MemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + ldg(rt, addr); +} + +void MacroAssembler::Sub(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S) { + VIXL_ASSERT(allow_macro_instructions_); + if (operand.IsImmediate()) { + int64_t imm = operand.GetImmediate(); + if ((imm < 0) && (imm != std::numeric_limits::min()) && + IsImmAddSub(-imm)) { + AddSubMacro(rd, rn, -imm, S, ADD); + return; + } + } + AddSubMacro(rd, rn, operand, S, SUB); +} + + +void MacroAssembler::Subs(const Register& rd, + const Register& rn, + const Operand& operand) { + Sub(rd, rn, operand, SetFlags); +} + + +void MacroAssembler::Cmn(const Register& rn, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + Adds(AppropriateZeroRegFor(rn), rn, operand); +} + + +void MacroAssembler::Cmp(const Register& rn, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + Subs(AppropriateZeroRegFor(rn), rn, operand); +} + + +void MacroAssembler::Fcmp(const VRegister& fn, double value, FPTrapFlags trap) { + VIXL_ASSERT(allow_macro_instructions_); + // The worst case for size is: + // * 1 to materialise the constant, using literal pool if necessary + // * 1 instruction for fcmp{e} + MacroEmissionCheckScope guard(this); + if (value != 0.0) { + UseScratchRegisterScope temps(this); + VRegister tmp = temps.AcquireSameSizeAs(fn); + Fmov(tmp, value); + FPCompareMacro(fn, tmp, trap); + } else { + FPCompareMacro(fn, value, trap); + } +} + + +void MacroAssembler::Fcmpe(const VRegister& fn, double value) { + Fcmp(fn, value, EnableTrap); +} + + +void MacroAssembler::Fmov(VRegister vd, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + // Floating point immediates are loaded through the literal pool. + MacroEmissionCheckScope guard(this); + uint64_t rawbits = DoubleToRawbits(imm); + + if (rawbits == 0) { + fmov(vd.D(), xzr); + return; + } + + if (vd.Is1H() || vd.Is4H() || vd.Is8H()) { + Fmov(vd, Float16(imm)); + return; + } + + if (vd.Is1S() || vd.Is2S() || vd.Is4S()) { + Fmov(vd, static_cast(imm)); + return; + } + + VIXL_ASSERT(vd.Is1D() || vd.Is2D()); + if (IsImmFP64(rawbits)) { + fmov(vd, imm); + } else if (vd.IsScalar()) { + ldr(vd, + new Literal(imm, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool)); + } else { + // TODO: consider NEON support for load literal. + Movi(vd, rawbits); + } +} + + +void MacroAssembler::Fmov(VRegister vd, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + // Floating point immediates are loaded through the literal pool. + MacroEmissionCheckScope guard(this); + uint32_t rawbits = FloatToRawbits(imm); + + if (rawbits == 0) { + fmov(vd.S(), wzr); + return; + } + + if (vd.Is1H() || vd.Is4H() || vd.Is8H()) { + Fmov(vd, Float16(imm)); + return; + } + + if (vd.Is1D() || vd.Is2D()) { + Fmov(vd, static_cast(imm)); + return; + } + + VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S()); + if (IsImmFP32(rawbits)) { + fmov(vd, imm); + } else if (vd.IsScalar()) { + ldr(vd, + new Literal(imm, + &literal_pool_, + RawLiteral::kDeletedOnPlacementByPool)); + } else { + // TODO: consider NEON support for load literal. + Movi(vd, rawbits); + } +} + + +void MacroAssembler::Fmov(VRegister vd, Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + MacroEmissionCheckScope guard(this); + + if (vd.Is1S() || vd.Is2S() || vd.Is4S()) { + Fmov(vd, FPToFloat(imm, kIgnoreDefaultNaN)); + return; + } + + if (vd.Is1D() || vd.Is2D()) { + Fmov(vd, FPToDouble(imm, kIgnoreDefaultNaN)); + return; + } + + VIXL_ASSERT(vd.Is1H() || vd.Is4H() || vd.Is8H()); + uint16_t rawbits = Float16ToRawbits(imm); + if (IsImmFP16(imm)) { + fmov(vd, imm); + } else { + if (vd.IsScalar()) { + if (rawbits == 0x0) { + fmov(vd, wzr); + } else { + // We can use movz instead of the literal pool. + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireW(); + Mov(temp, rawbits); + Fmov(vd, temp); + } + } else { + // TODO: consider NEON support for load literal. + Movi(vd, static_cast(rawbits)); + } + } +} + + +void MacroAssembler::Neg(const Register& rd, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + if (operand.IsImmediate()) { + Mov(rd, -operand.GetImmediate()); + } else { + Sub(rd, AppropriateZeroRegFor(rd), operand); + } +} + + +void MacroAssembler::Negs(const Register& rd, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + Subs(rd, AppropriateZeroRegFor(rd), operand); +} + + +bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst, + uint64_t imm) { + return OneInstrMoveImmediateHelper(this, dst, imm); +} + + +Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst, + uint64_t imm, + PreShiftImmMode mode) { + int reg_size = dst.GetSizeInBits(); + + // Encode the immediate in a single move instruction, if possible. + if (TryOneInstrMoveImmediate(dst, imm)) { + // The move was successful; nothing to do here. + } else { + // Pre-shift the immediate to the least-significant bits of the register. + int shift_low = CountTrailingZeros(imm, reg_size); + if (mode == kLimitShiftForSP) { + // When applied to the stack pointer, the subsequent arithmetic operation + // can use the extend form to shift left by a maximum of four bits. Right + // shifts are not allowed, so we filter them out later before the new + // immediate is tested. + shift_low = std::min(shift_low, 4); + } + // TryOneInstrMoveImmediate handles `imm` with a value of zero, so shift_low + // must lie in the range [0, 63], and the shifts below are well-defined. + VIXL_ASSERT((shift_low >= 0) && (shift_low < 64)); + // imm_low = imm >> shift_low (with sign extension) + uint64_t imm_low = ExtractSignedBitfield64(63, shift_low, imm); + + // Pre-shift the immediate to the most-significant bits of the register, + // inserting set bits in the least-significant bits. + int shift_high = CountLeadingZeros(imm, reg_size); + VIXL_ASSERT((shift_high >= 0) && (shift_high < 64)); + uint64_t imm_high = (imm << shift_high) | GetUintMask(shift_high); + + if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) { + // The new immediate has been moved into the destination's low bits: + // return a new leftward-shifting operand. + return Operand(dst, LSL, shift_low); + } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) { + // The new immediate has been moved into the destination's high bits: + // return a new rightward-shifting operand. + return Operand(dst, LSR, shift_high); + } else { + Mov(dst, imm); + } + } + return Operand(dst); +} + + +void MacroAssembler::Move(const GenericOperand& dst, + const GenericOperand& src) { + if (dst.Equals(src)) { + return; + } + + VIXL_ASSERT(dst.IsValid() && src.IsValid()); + + // The sizes of the operands must match exactly. + VIXL_ASSERT(dst.GetSizeInBits() == src.GetSizeInBits()); + VIXL_ASSERT(dst.GetSizeInBits() <= kXRegSize); + int operand_size = static_cast(dst.GetSizeInBits()); + + if (dst.IsCPURegister() && src.IsCPURegister()) { + CPURegister dst_reg = dst.GetCPURegister(); + CPURegister src_reg = src.GetCPURegister(); + if (dst_reg.IsRegister() && src_reg.IsRegister()) { + Mov(Register(dst_reg), Register(src_reg)); + } else if (dst_reg.IsVRegister() && src_reg.IsVRegister()) { + Fmov(VRegister(dst_reg), VRegister(src_reg)); + } else { + if (dst_reg.IsRegister()) { + Fmov(Register(dst_reg), VRegister(src_reg)); + } else { + Fmov(VRegister(dst_reg), Register(src_reg)); + } + } + return; + } + + if (dst.IsMemOperand() && src.IsMemOperand()) { + UseScratchRegisterScope temps(this); + CPURegister temp = temps.AcquireCPURegisterOfSize(operand_size); + Ldr(temp, src.GetMemOperand()); + Str(temp, dst.GetMemOperand()); + return; + } + + if (dst.IsCPURegister()) { + Ldr(dst.GetCPURegister(), src.GetMemOperand()); + } else { + Str(src.GetCPURegister(), dst.GetMemOperand()); + } +} + + +void MacroAssembler::ComputeAddress(const Register& dst, + const MemOperand& mem_op) { + // We cannot handle pre-indexing or post-indexing. + VIXL_ASSERT(mem_op.GetAddrMode() == Offset); + Register base = mem_op.GetBaseRegister(); + if (mem_op.IsImmediateOffset()) { + Add(dst, base, mem_op.GetOffset()); + } else { + VIXL_ASSERT(mem_op.IsRegisterOffset()); + Register reg_offset = mem_op.GetRegisterOffset(); + Shift shift = mem_op.GetShift(); + Extend extend = mem_op.GetExtend(); + if (shift == NO_SHIFT) { + VIXL_ASSERT(extend != NO_EXTEND); + Add(dst, base, Operand(reg_offset, extend, mem_op.GetShiftAmount())); + } else { + VIXL_ASSERT(extend == NO_EXTEND); + Add(dst, base, Operand(reg_offset, shift, mem_op.GetShiftAmount())); + } + } +} + + +void MacroAssembler::AddSubMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubOp op) { + // Worst case is add/sub immediate: + // * up to 4 instructions to materialise the constant + // * 1 instruction for add/sub + MacroEmissionCheckScope guard(this); + + if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() && + (S == LeaveFlags)) { + // The instruction would be a nop. Avoid generating useless code. + return; + } + + if ((operand.IsImmediate() && !IsImmAddSub(operand.GetImmediate())) || + (rn.IsZero() && !operand.IsShiftedRegister()) || + (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) { + UseScratchRegisterScope temps(this); + // Use `rd` as a temp, if we can. + temps.Include(rd); + // We read `rn` after evaluating `operand`. + temps.Exclude(rn); + // It doesn't matter if `operand` is in `temps` (e.g. because it alises + // `rd`) because we don't need it after it is evaluated. + Register temp = temps.AcquireSameSizeAs(rn); + if (operand.IsImmediate()) { + PreShiftImmMode mode = kAnyShift; + + // If the destination or source register is the stack pointer, we can + // only pre-shift the immediate right by values supported in the add/sub + // extend encoding. + if (rd.IsSP()) { + // If the destination is SP and flags will be set, we can't pre-shift + // the immediate at all. + mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP; + } else if (rn.IsSP()) { + mode = kLimitShiftForSP; + } + + Operand imm_operand = + MoveImmediateForShiftedOp(temp, operand.GetImmediate(), mode); + AddSub(rd, rn, imm_operand, S, op); + } else { + Mov(temp, operand); + AddSub(rd, rn, temp, S, op); + } + } else { + AddSub(rd, rn, operand, S, op); + } +} + + +void MacroAssembler::Adc(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC); +} + + +void MacroAssembler::Adcs(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC); +} + + +void MacroAssembler::Sbc(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC); +} + + +void MacroAssembler::Sbcs(const Register& rd, + const Register& rn, + const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC); +} + + +void MacroAssembler::Ngc(const Register& rd, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + Register zr = AppropriateZeroRegFor(rd); + Sbc(rd, zr, operand); +} + + +void MacroAssembler::Ngcs(const Register& rd, const Operand& operand) { + VIXL_ASSERT(allow_macro_instructions_); + Register zr = AppropriateZeroRegFor(rd); + Sbcs(rd, zr, operand); +} + + +void MacroAssembler::AddSubWithCarryMacro(const Register& rd, + const Register& rn, + const Operand& operand, + FlagsUpdate S, + AddSubWithCarryOp op) { + VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits()); + // Worst case is addc/subc immediate: + // * up to 4 instructions to materialise the constant + // * 1 instruction for add/sub + MacroEmissionCheckScope guard(this); + UseScratchRegisterScope temps(this); + // Use `rd` as a temp, if we can. + temps.Include(rd); + // We read `rn` after evaluating `operand`. + temps.Exclude(rn); + // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`) + // because we don't need it after it is evaluated. + + if (operand.IsImmediate() || + (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) { + // Add/sub with carry (immediate or ROR shifted register.) + Register temp = temps.AcquireSameSizeAs(rn); + Mov(temp, operand); + AddSubWithCarry(rd, rn, Operand(temp), S, op); + } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) { + // Add/sub with carry (shifted register). + VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits()); + VIXL_ASSERT(operand.GetShift() != ROR); + VIXL_ASSERT( + IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2, + operand.GetShiftAmount())); + Register temp = temps.AcquireSameSizeAs(rn); + EmitShift(temp, + operand.GetRegister(), + operand.GetShift(), + operand.GetShiftAmount()); + AddSubWithCarry(rd, rn, Operand(temp), S, op); + } else if (operand.IsExtendedRegister()) { + // Add/sub with carry (extended register). + VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits()); + // Add/sub extended supports a shift <= 4. We want to support exactly the + // same modes. + VIXL_ASSERT(operand.GetShiftAmount() <= 4); + VIXL_ASSERT( + operand.GetRegister().Is64Bits() || + ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX))); + Register temp = temps.AcquireSameSizeAs(rn); + EmitExtendShift(temp, + operand.GetRegister(), + operand.GetExtend(), + operand.GetShiftAmount()); + AddSubWithCarry(rd, rn, Operand(temp), S, op); + } else { + // The addressing mode is directly supported by the instruction. + AddSubWithCarry(rd, rn, operand, S, op); + } +} + + +void MacroAssembler::Rmif(const Register& xn, + unsigned shift, + StatusFlags flags) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + rmif(xn, shift, flags); +} + + +void MacroAssembler::Setf8(const Register& wn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setf8(wn); +} + + +void MacroAssembler::Setf16(const Register& wn) { + VIXL_ASSERT(allow_macro_instructions_); + SingleEmissionCheckScope guard(this); + setf16(wn); +} + + +#define DEFINE_FUNCTION(FN, REGTYPE, REG, OP) \ + void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + LoadStoreMacro(REG, addr, OP); \ + } +LS_MACRO_LIST(DEFINE_FUNCTION) +#undef DEFINE_FUNCTION + + +void MacroAssembler::LoadStoreMacro(const CPURegister& rt, + const MemOperand& addr, + LoadStoreOp op) { + VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePostIndex() || + addr.IsImmediatePreIndex() || addr.IsRegisterOffset()); + + // Worst case is ldr/str pre/post index: + // * 1 instruction for ldr/str + // * up to 4 instructions to materialise the constant + // * 1 instruction to update the base + MacroEmissionCheckScope guard(this); + + int64_t offset = addr.GetOffset(); + unsigned access_size = CalcLSDataSize(op); + + // Check if an immediate offset fits in the immediate field of the + // appropriate instruction. If not, emit two instructions to perform + // the operation. + if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) && + !IsImmLSUnscaled(offset)) { + // Immediate offset that can't be encoded using unsigned or unscaled + // addressing modes. + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister()); + Mov(temp, addr.GetOffset()); + LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op); + } else if (addr.IsImmediatePostIndex() && !IsImmLSUnscaled(offset)) { + // Post-index beyond unscaled addressing range. + LoadStore(rt, MemOperand(addr.GetBaseRegister()), op); + Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset)); + } else if (addr.IsImmediatePreIndex() && !IsImmLSUnscaled(offset)) { + // Pre-index beyond unscaled addressing range. + Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset)); + LoadStore(rt, MemOperand(addr.GetBaseRegister()), op); + } else { + // Encodable in one load/store instruction. + LoadStore(rt, addr, op); + } +} + + +#define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \ + void MacroAssembler::FN(const REGTYPE REG, \ + const REGTYPE REG2, \ + const MemOperand& addr) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + LoadStorePairMacro(REG, REG2, addr, OP); \ + } +LSPAIR_MACRO_LIST(DEFINE_FUNCTION) +#undef DEFINE_FUNCTION + +void MacroAssembler::LoadStorePairMacro(const CPURegister& rt, + const CPURegister& rt2, + const MemOperand& addr, + LoadStorePairOp op) { + // TODO(all): Should we support register offset for load-store-pair? + VIXL_ASSERT(!addr.IsRegisterOffset()); + // Worst case is ldp/stp immediate: + // * 1 instruction for ldp/stp + // * up to 4 instructions to materialise the constant + // * 1 instruction to update the base + MacroEmissionCheckScope guard(this); + + int64_t offset = addr.GetOffset(); + unsigned access_size = CalcLSPairDataSize(op); + + // Check if the offset fits in the immediate field of the appropriate + // instruction. If not, emit two instructions to perform the operation. + if (IsImmLSPair(offset, access_size)) { + // Encodable in one load/store pair instruction. + LoadStorePair(rt, rt2, addr, op); + } else { + Register base = addr.GetBaseRegister(); + if (addr.IsImmediateOffset()) { + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(base); + Add(temp, base, offset); + LoadStorePair(rt, rt2, MemOperand(temp), op); + } else if (addr.IsImmediatePostIndex()) { + LoadStorePair(rt, rt2, MemOperand(base), op); + Add(base, base, offset); + } else { + VIXL_ASSERT(addr.IsImmediatePreIndex()); + Add(base, base, offset); + LoadStorePair(rt, rt2, MemOperand(base), op); + } + } +} + + +void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) { + MacroEmissionCheckScope guard(this); + + // There are no pre- or post-index modes for prfm. + VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset()); + + // The access size is implicitly 8 bytes for all prefetch operations. + unsigned size = kXRegSizeInBytesLog2; + + // Check if an immediate offset fits in the immediate field of the + // appropriate instruction. If not, emit two instructions to perform + // the operation. + if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.GetOffset(), size) && + !IsImmLSUnscaled(addr.GetOffset())) { + // Immediate offset that can't be encoded using unsigned or unscaled + // addressing modes. + UseScratchRegisterScope temps(this); + Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister()); + Mov(temp, addr.GetOffset()); + Prefetch(op, MemOperand(addr.GetBaseRegister(), temp)); + } else { + // Simple register-offsets are encodable in one instruction. + Prefetch(op, addr); + } +} + + +void MacroAssembler::Push(const CPURegister& src0, + const CPURegister& src1, + const CPURegister& src2, + const CPURegister& src3) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3)); + VIXL_ASSERT(src0.IsValid()); + + int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid(); + int size = src0.GetSizeInBytes(); + + PrepareForPush(count, size); + PushHelper(count, size, src0, src1, src2, src3); +} + + +void MacroAssembler::Pop(const CPURegister& dst0, + const CPURegister& dst1, + const CPURegister& dst2, + const CPURegister& dst3) { + // It is not valid to pop into the same register more than once in one + // instruction, not even into the zero register. + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3)); + VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3)); + VIXL_ASSERT(dst0.IsValid()); + + int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid(); + int size = dst0.GetSizeInBytes(); + + PrepareForPop(count, size); + PopHelper(count, size, dst0, dst1, dst2, dst3); +} + + +void MacroAssembler::PushCPURegList(CPURegList registers) { + VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList())); + VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList())); + VIXL_ASSERT(allow_macro_instructions_); + + int reg_size = registers.GetRegisterSizeInBytes(); + PrepareForPush(registers.GetCount(), reg_size); + + // Bump the stack pointer and store two registers at the bottom. + int size = registers.GetTotalSizeInBytes(); + const CPURegister& bottom_0 = registers.PopLowestIndex(); + const CPURegister& bottom_1 = registers.PopLowestIndex(); + if (bottom_0.IsValid() && bottom_1.IsValid()) { + Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex)); + } else if (bottom_0.IsValid()) { + Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex)); + } + + int offset = 2 * reg_size; + while (!registers.IsEmpty()) { + const CPURegister& src0 = registers.PopLowestIndex(); + const CPURegister& src1 = registers.PopLowestIndex(); + if (src1.IsValid()) { + Stp(src0, src1, MemOperand(StackPointer(), offset)); + } else { + Str(src0, MemOperand(StackPointer(), offset)); + } + offset += 2 * reg_size; + } +} + + +void MacroAssembler::PopCPURegList(CPURegList registers) { + VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList())); + VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList())); + VIXL_ASSERT(allow_macro_instructions_); + + int reg_size = registers.GetRegisterSizeInBytes(); + PrepareForPop(registers.GetCount(), reg_size); + + + int size = registers.GetTotalSizeInBytes(); + const CPURegister& bottom_0 = registers.PopLowestIndex(); + const CPURegister& bottom_1 = registers.PopLowestIndex(); + + int offset = 2 * reg_size; + while (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + if (dst1.IsValid()) { + Ldp(dst0, dst1, MemOperand(StackPointer(), offset)); + } else { + Ldr(dst0, MemOperand(StackPointer(), offset)); + } + offset += 2 * reg_size; + } + + // Load the two registers at the bottom and drop the stack pointer. + if (bottom_0.IsValid() && bottom_1.IsValid()) { + Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex)); + } else if (bottom_0.IsValid()) { + Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex)); + } +} + + +void MacroAssembler::PushMultipleTimes(int count, Register src) { + VIXL_ASSERT(allow_macro_instructions_); + int size = src.GetSizeInBytes(); + + PrepareForPush(count, size); + // Push up to four registers at a time if possible because if the current + // stack pointer is sp and the register size is 32, registers must be pushed + // in blocks of four in order to maintain the 16-byte alignment for sp. + while (count >= 4) { + PushHelper(4, size, src, src, src, src); + count -= 4; + } + if (count >= 2) { + PushHelper(2, size, src, src, NoReg, NoReg); + count -= 2; + } + if (count == 1) { + PushHelper(1, size, src, NoReg, NoReg, NoReg); + count -= 1; + } + VIXL_ASSERT(count == 0); +} + + +void MacroAssembler::PushHelper(int count, + int size, + const CPURegister& src0, + const CPURegister& src1, + const CPURegister& src2, + const CPURegister& src3) { + // Ensure that we don't unintentionally modify scratch or debug registers. + // Worst case for size is 2 stp. + ExactAssemblyScope scope(this, + 2 * kInstructionSize, + ExactAssemblyScope::kMaximumSize); + + VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3)); + VIXL_ASSERT(size == src0.GetSizeInBytes()); + + // When pushing multiple registers, the store order is chosen such that + // Push(a, b) is equivalent to Push(a) followed by Push(b). + switch (count) { + case 1: + VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone()); + str(src0, MemOperand(StackPointer(), -1 * size, PreIndex)); + break; + case 2: + VIXL_ASSERT(src2.IsNone() && src3.IsNone()); + stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex)); + break; + case 3: + VIXL_ASSERT(src3.IsNone()); + stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex)); + str(src0, MemOperand(StackPointer(), 2 * size)); + break; + case 4: + // Skip over 4 * size, then fill in the gap. This allows four W registers + // to be pushed using sp, whilst maintaining 16-byte alignment for sp at + // all times. + stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex)); + stp(src1, src0, MemOperand(StackPointer(), 2 * size)); + break; + default: + VIXL_UNREACHABLE(); + } +} + + +void MacroAssembler::PopHelper(int count, + int size, + const CPURegister& dst0, + const CPURegister& dst1, + const CPURegister& dst2, + const CPURegister& dst3) { + // Ensure that we don't unintentionally modify scratch or debug registers. + // Worst case for size is 2 ldp. + ExactAssemblyScope scope(this, + 2 * kInstructionSize, + ExactAssemblyScope::kMaximumSize); + + VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3)); + VIXL_ASSERT(size == dst0.GetSizeInBytes()); + + // When popping multiple registers, the load order is chosen such that + // Pop(a, b) is equivalent to Pop(a) followed by Pop(b). + switch (count) { + case 1: + VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone()); + ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex)); + break; + case 2: + VIXL_ASSERT(dst2.IsNone() && dst3.IsNone()); + ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex)); + break; + case 3: + VIXL_ASSERT(dst3.IsNone()); + ldr(dst2, MemOperand(StackPointer(), 2 * size)); + ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex)); + break; + case 4: + // Load the higher addresses first, then load the lower addresses and skip + // the whole block in the second instruction. This allows four W registers + // to be popped using sp, whilst maintaining 16-byte alignment for sp at + // all times. + ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size)); + ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex)); + break; + default: + VIXL_UNREACHABLE(); + } +} + + +void MacroAssembler::PrepareForPush(int count, int size) { + if (sp.Is(StackPointer())) { + // If the current stack pointer is sp, then it must be aligned to 16 bytes + // on entry and the total size of the specified registers must also be a + // multiple of 16 bytes. + VIXL_ASSERT((count * size) % 16 == 0); + } else { + // Even if the current stack pointer is not the system stack pointer (sp), + // the system stack pointer will still be modified in order to comply with + // ABI rules about accessing memory below the system stack pointer. + BumpSystemStackPointer(count * size); + } +} + + +void MacroAssembler::PrepareForPop(int count, int size) { + USE(count, size); + if (sp.Is(StackPointer())) { + // If the current stack pointer is sp, then it must be aligned to 16 bytes + // on entry and the total size of the specified registers must also be a + // multiple of 16 bytes. + VIXL_ASSERT((count * size) % 16 == 0); + } +} + +void MacroAssembler::Poke(const Register& src, const Operand& offset) { + VIXL_ASSERT(allow_macro_instructions_); + if (offset.IsImmediate()) { + VIXL_ASSERT(offset.GetImmediate() >= 0); + } + + Str(src, MemOperand(StackPointer(), offset)); +} + + +void MacroAssembler::Peek(const Register& dst, const Operand& offset) { + VIXL_ASSERT(allow_macro_instructions_); + if (offset.IsImmediate()) { + VIXL_ASSERT(offset.GetImmediate() >= 0); + } + + Ldr(dst, MemOperand(StackPointer(), offset)); +} + + +void MacroAssembler::Claim(const Operand& size) { + VIXL_ASSERT(allow_macro_instructions_); + + if (size.IsZero()) { + return; + } + + if (size.IsImmediate()) { + VIXL_ASSERT(size.GetImmediate() > 0); + if (sp.Is(StackPointer())) { + VIXL_ASSERT((size.GetImmediate() % 16) == 0); + } + } + + if (!sp.Is(StackPointer())) { + BumpSystemStackPointer(size); + } + + Sub(StackPointer(), StackPointer(), size); +} + + +void MacroAssembler::Drop(const Operand& size) { + VIXL_ASSERT(allow_macro_instructions_); + + if (size.IsZero()) { + return; + } + + if (size.IsImmediate()) { + VIXL_ASSERT(size.GetImmediate() > 0); + if (sp.Is(StackPointer())) { + VIXL_ASSERT((size.GetImmediate() % 16) == 0); + } + } + + Add(StackPointer(), StackPointer(), size); +} + + +void MacroAssembler::PushCalleeSavedRegisters() { + // Ensure that the macro-assembler doesn't use any scratch registers. + // 10 stp will be emitted. + // TODO(all): Should we use GetCalleeSaved and SavedFP. + ExactAssemblyScope scope(this, 10 * kInstructionSize); + + // This method must not be called unless the current stack pointer is sp. + VIXL_ASSERT(sp.Is(StackPointer())); + + MemOperand tos(sp, -2 * static_cast(kXRegSizeInBytes), PreIndex); + + stp(x29, x30, tos); + stp(x27, x28, tos); + stp(x25, x26, tos); + stp(x23, x24, tos); + stp(x21, x22, tos); + stp(x19, x20, tos); + + stp(d14, d15, tos); + stp(d12, d13, tos); + stp(d10, d11, tos); + stp(d8, d9, tos); +} + + +void MacroAssembler::PopCalleeSavedRegisters() { + // Ensure that the macro-assembler doesn't use any scratch registers. + // 10 ldp will be emitted. + // TODO(all): Should we use GetCalleeSaved and SavedFP. + ExactAssemblyScope scope(this, 10 * kInstructionSize); + + // This method must not be called unless the current stack pointer is sp. + VIXL_ASSERT(sp.Is(StackPointer())); + + MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex); + + ldp(d8, d9, tos); + ldp(d10, d11, tos); + ldp(d12, d13, tos); + ldp(d14, d15, tos); + + ldp(x19, x20, tos); + ldp(x21, x22, tos); + ldp(x23, x24, tos); + ldp(x25, x26, tos); + ldp(x27, x28, tos); + ldp(x29, x30, tos); +} + +void MacroAssembler::LoadCPURegList(CPURegList registers, + const MemOperand& src) { + LoadStoreCPURegListHelper(kLoad, registers, src); +} + +void MacroAssembler::StoreCPURegList(CPURegList registers, + const MemOperand& dst) { + LoadStoreCPURegListHelper(kStore, registers, dst); +} + + +void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op, + CPURegList registers, + const MemOperand& mem) { + // We do not handle pre-indexing or post-indexing. + VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex())); + VIXL_ASSERT(!registers.Overlaps(tmp_list_)); + VIXL_ASSERT(!registers.Overlaps(v_tmp_list_)); + VIXL_ASSERT(!registers.Overlaps(p_tmp_list_)); + VIXL_ASSERT(!registers.IncludesAliasOf(sp)); + + UseScratchRegisterScope temps(this); + + MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers, mem, &temps); + const int reg_size = registers.GetRegisterSizeInBytes(); + + VIXL_ASSERT(IsPowerOf2(reg_size)); + + // Since we are operating on register pairs, we would like to align on double + // the standard size; on the other hand, we don't want to insert an extra + // operation, which will happen if the number of registers is even. Note that + // the alignment of the base pointer is unknown here, but we assume that it + // is more likely to be aligned. + if (((loc.GetOffset() & (2 * reg_size - 1)) != 0) && + ((registers.GetCount() % 2) != 0)) { + if (op == kStore) { + Str(registers.PopLowestIndex(), loc); + } else { + VIXL_ASSERT(op == kLoad); + Ldr(registers.PopLowestIndex(), loc); + } + loc.AddOffset(reg_size); + } + while (registers.GetCount() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + if (op == kStore) { + Stp(dst0, dst1, loc); + } else { + VIXL_ASSERT(op == kLoad); + Ldp(dst0, dst1, loc); + } + loc.AddOffset(2 * reg_size); + } + if (!registers.IsEmpty()) { + if (op == kStore) { + Str(registers.PopLowestIndex(), loc); + } else { + VIXL_ASSERT(op == kLoad); + Ldr(registers.PopLowestIndex(), loc); + } + } +} + +MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList( + const CPURegList& registers, + const MemOperand& mem, + UseScratchRegisterScope* scratch_scope) { + // If necessary, pre-compute the base address for the accesses. + if (mem.IsRegisterOffset()) { + Register reg_base = scratch_scope->AcquireX(); + ComputeAddress(reg_base, mem); + return MemOperand(reg_base); + + } else if (mem.IsImmediateOffset()) { + int reg_size = registers.GetRegisterSizeInBytes(); + int total_size = registers.GetTotalSizeInBytes(); + int64_t min_offset = mem.GetOffset(); + int64_t max_offset = + mem.GetOffset() + std::max(0, total_size - 2 * reg_size); + if ((registers.GetCount() >= 2) && + (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) || + !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) { + Register reg_base = scratch_scope->AcquireX(); + ComputeAddress(reg_base, mem); + return MemOperand(reg_base); + } + } + + return mem; +} + +void MacroAssembler::BumpSystemStackPointer(const Operand& space) { + VIXL_ASSERT(!sp.Is(StackPointer())); + // TODO: Several callers rely on this not using scratch registers, so we use + // the assembler directly here. However, this means that large immediate + // values of 'space' cannot be handled. + ExactAssemblyScope scope(this, kInstructionSize); + sub(sp, StackPointer(), space); +} + + +// TODO(all): Fix printf for NEON and SVE registers. + +// This is the main Printf implementation. All callee-saved registers are +// preserved, but NZCV and the caller-saved registers may be clobbered. +void MacroAssembler::PrintfNoPreserve(const char* format, + const CPURegister& arg0, + const CPURegister& arg1, + const CPURegister& arg2, + const CPURegister& arg3) { + // We cannot handle a caller-saved stack pointer. It doesn't make much sense + // in most cases anyway, so this restriction shouldn't be too serious. + VIXL_ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer())); + + // The provided arguments, and their proper PCS registers. + CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3}; + CPURegister pcs[kPrintfMaxArgCount]; + + int arg_count = kPrintfMaxArgCount; + + // The PCS varargs registers for printf. Note that x0 is used for the printf + // format string. + static const CPURegList kPCSVarargs = + CPURegList(CPURegister::kRegister, kXRegSize, 1, arg_count); + static const CPURegList kPCSVarargsV = + CPURegList(CPURegister::kVRegister, kDRegSize, 0, arg_count - 1); + + // We can use caller-saved registers as scratch values, except for the + // arguments and the PCS registers where they might need to go. + UseScratchRegisterScope temps(this); + temps.Include(kCallerSaved); + temps.Include(kCallerSavedV); + temps.Exclude(kPCSVarargs); + temps.Exclude(kPCSVarargsV); + temps.Exclude(arg0, arg1, arg2, arg3); + + // Copies of the arg lists that we can iterate through. + CPURegList pcs_varargs = kPCSVarargs; + CPURegList pcs_varargs_fp = kPCSVarargsV; + + // Place the arguments. There are lots of clever tricks and optimizations we + // could use here, but Printf is a debug tool so instead we just try to keep + // it simple: Move each input that isn't already in the right place to a + // scratch register, then move everything back. + for (unsigned i = 0; i < kPrintfMaxArgCount; i++) { + // Work out the proper PCS register for this argument. + if (args[i].IsRegister()) { + pcs[i] = pcs_varargs.PopLowestIndex().X(); + // We might only need a W register here. We need to know the size of the + // argument so we can properly encode it for the simulator call. + if (args[i].Is32Bits()) pcs[i] = pcs[i].W(); + } else if (args[i].IsVRegister()) { + // In C, floats are always cast to doubles for varargs calls. + pcs[i] = pcs_varargs_fp.PopLowestIndex().D(); + } else { + VIXL_ASSERT(args[i].IsNone()); + arg_count = i; + break; + } + + // If the argument is already in the right place, leave it where it is. + if (args[i].Aliases(pcs[i])) continue; + + // Otherwise, if the argument is in a PCS argument register, allocate an + // appropriate scratch register and then move it out of the way. + if (kPCSVarargs.IncludesAliasOf(args[i]) || + kPCSVarargsV.IncludesAliasOf(args[i])) { + if (args[i].IsRegister()) { + Register old_arg = Register(args[i]); + Register new_arg = temps.AcquireSameSizeAs(old_arg); + Mov(new_arg, old_arg); + args[i] = new_arg; + } else { + VRegister old_arg(args[i]); + VRegister new_arg = temps.AcquireSameSizeAs(old_arg); + Fmov(new_arg, old_arg); + args[i] = new_arg; + } + } + } + + // Do a second pass to move values into their final positions and perform any + // conversions that may be required. + for (int i = 0; i < arg_count; i++) { + VIXL_ASSERT(pcs[i].GetType() == args[i].GetType()); + if (pcs[i].IsRegister()) { + Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg); + } else { + VIXL_ASSERT(pcs[i].IsVRegister()); + if (pcs[i].GetSizeInBits() == args[i].GetSizeInBits()) { + Fmov(VRegister(pcs[i]), VRegister(args[i])); + } else { + Fcvt(VRegister(pcs[i]), VRegister(args[i])); + } + } + } + + // Load the format string into x0, as per the procedure-call standard. + // + // To make the code as portable as possible, the format string is encoded + // directly in the instruction stream. It might be cleaner to encode it in a + // literal pool, but since Printf is usually used for debugging, it is + // beneficial for it to be minimally dependent on other features. + temps.Exclude(x0); + Label format_address; + Adr(x0, &format_address); + + // Emit the format string directly in the instruction stream. + { + BlockPoolsScope scope(this); + // Data emitted: + // branch + // strlen(format) + 1 (includes null termination) + // padding to next instruction + // unreachable + EmissionCheckScope guard(this, + AlignUp(strlen(format) + 1, kInstructionSize) + + 2 * kInstructionSize); + Label after_data; + B(&after_data); + Bind(&format_address); + EmitString(format); + Unreachable(); + Bind(&after_data); + } + + // We don't pass any arguments on the stack, but we still need to align the C + // stack pointer to a 16-byte boundary for PCS compliance. + if (!sp.Is(StackPointer())) { + Bic(sp, StackPointer(), 0xf); + } + + // Actually call printf. This part needs special handling for the simulator, + // since the system printf function will use a different instruction set and + // the procedure-call standard will not be compatible. + if (generate_simulator_code_) { + ExactAssemblyScope scope(this, kPrintfLength); + hlt(kPrintfOpcode); + dc32(arg_count); // kPrintfArgCountOffset + + // Determine the argument pattern. + uint32_t arg_pattern_list = 0; + for (int i = 0; i < arg_count; i++) { + uint32_t arg_pattern; + if (pcs[i].IsRegister()) { + arg_pattern = pcs[i].Is32Bits() ? kPrintfArgW : kPrintfArgX; + } else { + VIXL_ASSERT(pcs[i].Is64Bits()); + arg_pattern = kPrintfArgD; + } + VIXL_ASSERT(arg_pattern < (1 << kPrintfArgPatternBits)); + arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i)); + } + dc32(arg_pattern_list); // kPrintfArgPatternListOffset + } else { + Register tmp = temps.AcquireX(); + Mov(tmp, reinterpret_cast(printf)); + Blr(tmp); + } +} + + +void MacroAssembler::Printf(const char* format, + CPURegister arg0, + CPURegister arg1, + CPURegister arg2, + CPURegister arg3) { + // We can only print sp if it is the current stack pointer. + if (!sp.Is(StackPointer())) { + VIXL_ASSERT(!sp.Aliases(arg0)); + VIXL_ASSERT(!sp.Aliases(arg1)); + VIXL_ASSERT(!sp.Aliases(arg2)); + VIXL_ASSERT(!sp.Aliases(arg3)); + } + + // Make sure that the macro assembler doesn't try to use any of our arguments + // as scratch registers. + UseScratchRegisterScope exclude_all(this); + exclude_all.ExcludeAll(); + + // Preserve all caller-saved registers as well as NZCV. + // If sp is the stack pointer, PushCPURegList asserts that the size of each + // list is a multiple of 16 bytes. + PushCPURegList(kCallerSaved); + PushCPURegList(kCallerSavedV); + + { + UseScratchRegisterScope temps(this); + // We can use caller-saved registers as scratch values (except for argN). + temps.Include(kCallerSaved); + temps.Include(kCallerSavedV); + temps.Exclude(arg0, arg1, arg2, arg3); + + // If any of the arguments are the current stack pointer, allocate a new + // register for them, and adjust the value to compensate for pushing the + // caller-saved registers. + bool arg0_sp = StackPointer().Aliases(arg0); + bool arg1_sp = StackPointer().Aliases(arg1); + bool arg2_sp = StackPointer().Aliases(arg2); + bool arg3_sp = StackPointer().Aliases(arg3); + if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) { + // Allocate a register to hold the original stack pointer value, to pass + // to PrintfNoPreserve as an argument. + Register arg_sp = temps.AcquireX(); + Add(arg_sp, + StackPointer(), + kCallerSaved.GetTotalSizeInBytes() + + kCallerSavedV.GetTotalSizeInBytes()); + if (arg0_sp) arg0 = Register(arg_sp.GetCode(), arg0.GetSizeInBits()); + if (arg1_sp) arg1 = Register(arg_sp.GetCode(), arg1.GetSizeInBits()); + if (arg2_sp) arg2 = Register(arg_sp.GetCode(), arg2.GetSizeInBits()); + if (arg3_sp) arg3 = Register(arg_sp.GetCode(), arg3.GetSizeInBits()); + } + + // Preserve NZCV. + Register tmp = temps.AcquireX(); + Mrs(tmp, NZCV); + Push(tmp, xzr); + temps.Release(tmp); + + PrintfNoPreserve(format, arg0, arg1, arg2, arg3); + + // Restore NZCV. + tmp = temps.AcquireX(); + Pop(xzr, tmp); + Msr(NZCV, tmp); + temps.Release(tmp); + } + + PopCPURegList(kCallerSavedV); + PopCPURegList(kCallerSaved); +} + +void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) { + VIXL_ASSERT(allow_macro_instructions_); + + if (generate_simulator_code_) { + // The arguments to the trace pseudo instruction need to be contiguous in + // memory, so make sure we don't try to emit a literal pool. + ExactAssemblyScope scope(this, kTraceLength); + + Label start; + bind(&start); + + // Refer to simulator-aarch64.h for a description of the marker and its + // arguments. + hlt(kTraceOpcode); + + VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceParamsOffset); + dc32(parameters); + + VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceCommandOffset); + dc32(command); + } else { + // Emit nothing on real hardware. + USE(parameters, command); + } +} + + +void MacroAssembler::Log(TraceParameters parameters) { + VIXL_ASSERT(allow_macro_instructions_); + + if (generate_simulator_code_) { + // The arguments to the log pseudo instruction need to be contiguous in + // memory, so make sure we don't try to emit a literal pool. + ExactAssemblyScope scope(this, kLogLength); + + Label start; + bind(&start); + + // Refer to simulator-aarch64.h for a description of the marker and its + // arguments. + hlt(kLogOpcode); + + VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kLogParamsOffset); + dc32(parameters); + } else { + // Emit nothing on real hardware. + USE(parameters); + } +} + + +void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) { + ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode); +} + + +void MacroAssembler::EnableSimulatorCPUFeatures(const CPUFeatures& features) { + ConfigureSimulatorCPUFeaturesHelper(features, kEnableCPUFeaturesOpcode); +} + + +void MacroAssembler::DisableSimulatorCPUFeatures(const CPUFeatures& features) { + ConfigureSimulatorCPUFeaturesHelper(features, kDisableCPUFeaturesOpcode); +} + + +void MacroAssembler::ConfigureSimulatorCPUFeaturesHelper( + const CPUFeatures& features, DebugHltOpcode action) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(generate_simulator_code_); + + typedef ConfigureCPUFeaturesElementType ElementType; + VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <= + std::numeric_limits::max()); + + size_t count = features.Count(); + + size_t preamble_length = kConfigureCPUFeaturesListOffset; + size_t list_length = (count + 1) * sizeof(ElementType); + size_t padding_length = AlignUp(list_length, kInstructionSize) - list_length; + + size_t total_length = preamble_length + list_length + padding_length; + + // Check the overall code size as well as the size of each component. + ExactAssemblyScope guard_total(this, total_length); + + { // Preamble: the opcode itself. + ExactAssemblyScope guard_preamble(this, preamble_length); + hlt(action); + } + { // A kNone-terminated list of features. + ExactAssemblyScope guard_list(this, list_length); + for (CPUFeatures::const_iterator it = features.begin(); + it != features.end(); + ++it) { + dc(static_cast(*it)); + } + dc(static_cast(CPUFeatures::kNone)); + } + { // Padding for instruction alignment. + ExactAssemblyScope guard_padding(this, padding_length); + for (size_t size = 0; size < padding_length; size += sizeof(ElementType)) { + // The exact value is arbitrary. + dc(static_cast(CPUFeatures::kNone)); + } + } +} + +void MacroAssembler::SaveSimulatorCPUFeatures() { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(generate_simulator_code_); + SingleEmissionCheckScope guard(this); + hlt(kSaveCPUFeaturesOpcode); +} + + +void MacroAssembler::RestoreSimulatorCPUFeatures() { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(generate_simulator_code_); + SingleEmissionCheckScope guard(this); + hlt(kRestoreCPUFeaturesOpcode); +} + + +void UseScratchRegisterScope::Open(MacroAssembler* masm) { + VIXL_ASSERT(masm_ == NULL); + VIXL_ASSERT(masm != NULL); + masm_ = masm; + + CPURegList* available = masm->GetScratchRegisterList(); + CPURegList* available_v = masm->GetScratchVRegisterList(); + CPURegList* available_p = masm->GetScratchPRegisterList(); + old_available_ = available->GetList(); + old_available_v_ = available_v->GetList(); + old_available_p_ = available_p->GetList(); + VIXL_ASSERT(available->GetType() == CPURegister::kRegister); + VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister); + VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister); + + parent_ = masm->GetCurrentScratchRegisterScope(); + masm->SetCurrentScratchRegisterScope(this); +} + + +void UseScratchRegisterScope::Close() { + if (masm_ != NULL) { + // Ensure that scopes nest perfectly, and do not outlive their parents. + // This is a run-time check because the order of destruction of objects in + // the _same_ scope is implementation-defined, and is likely to change in + // optimised builds. + VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this); + masm_->SetCurrentScratchRegisterScope(parent_); + + masm_->GetScratchRegisterList()->SetList(old_available_); + masm_->GetScratchVRegisterList()->SetList(old_available_v_); + masm_->GetScratchPRegisterList()->SetList(old_available_p_); + + masm_ = NULL; + } +} + + +bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const { + return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) || + masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) || + masm_->GetScratchPRegisterList()->IncludesAliasOf(reg); +} + +Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) { + int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode(); + return Register(code, size_in_bits); +} + + +VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) { + int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode(); + return VRegister(code, size_in_bits); +} + + +void UseScratchRegisterScope::Release(const CPURegister& reg) { + VIXL_ASSERT(masm_ != NULL); + + // Release(NoReg) has no effect. + if (reg.IsNone()) return; + + ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode()); +} + + +void UseScratchRegisterScope::Include(const CPURegList& list) { + VIXL_ASSERT(masm_ != NULL); + + // Including an empty list has no effect. + if (list.IsEmpty()) return; + VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister); + + RegList reg_list = list.GetList(); + if (list.GetType() == CPURegister::kRegister) { + // Make sure that neither sp nor xzr are included the list. + reg_list &= ~(xzr.GetBit() | sp.GetBit()); + } + + IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list); +} + + +void UseScratchRegisterScope::Include(const Register& reg1, + const Register& reg2, + const Register& reg3, + const Register& reg4) { + VIXL_ASSERT(masm_ != NULL); + RegList include = + reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit(); + // Make sure that neither sp nor xzr are included the list. + include &= ~(xzr.GetBit() | sp.GetBit()); + + IncludeByRegList(masm_->GetScratchRegisterList(), include); +} + + +void UseScratchRegisterScope::Include(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3, + const VRegister& reg4) { + RegList include = + reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit(); + IncludeByRegList(masm_->GetScratchVRegisterList(), include); +} + + +void UseScratchRegisterScope::Include(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + RegList include = 0; + RegList include_v = 0; + RegList include_p = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4}; + + for (size_t i = 0; i < ArrayLength(regs); i++) { + RegList bit = regs[i].GetBit(); + switch (regs[i].GetBank()) { + case CPURegister::kNoRegisterBank: + // Include(NoReg) has no effect. + VIXL_ASSERT(regs[i].IsNone()); + break; + case CPURegister::kRRegisterBank: + include |= bit; + break; + case CPURegister::kVRegisterBank: + include_v |= bit; + break; + case CPURegister::kPRegisterBank: + include_p |= bit; + break; + } + } + + IncludeByRegList(masm_->GetScratchRegisterList(), include); + IncludeByRegList(masm_->GetScratchVRegisterList(), include_v); + IncludeByRegList(masm_->GetScratchPRegisterList(), include_p); +} + + +void UseScratchRegisterScope::Exclude(const CPURegList& list) { + ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList()); +} + + +void UseScratchRegisterScope::Exclude(const Register& reg1, + const Register& reg2, + const Register& reg3, + const Register& reg4) { + RegList exclude = + reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit(); + ExcludeByRegList(masm_->GetScratchRegisterList(), exclude); +} + + +void UseScratchRegisterScope::Exclude(const VRegister& reg1, + const VRegister& reg2, + const VRegister& reg3, + const VRegister& reg4) { + RegList exclude_v = + reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit(); + ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v); +} + + +void UseScratchRegisterScope::Exclude(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + RegList exclude = 0; + RegList exclude_v = 0; + RegList exclude_p = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4}; + + for (size_t i = 0; i < ArrayLength(regs); i++) { + RegList bit = regs[i].GetBit(); + switch (regs[i].GetBank()) { + case CPURegister::kNoRegisterBank: + // Exclude(NoReg) has no effect. + VIXL_ASSERT(regs[i].IsNone()); + break; + case CPURegister::kRRegisterBank: + exclude |= bit; + break; + case CPURegister::kVRegisterBank: + exclude_v |= bit; + break; + case CPURegister::kPRegisterBank: + exclude_p |= bit; + break; + } + } + + ExcludeByRegList(masm_->GetScratchRegisterList(), exclude); + ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v); + ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p); +} + + +void UseScratchRegisterScope::ExcludeAll() { + ExcludeByRegList(masm_->GetScratchRegisterList(), + masm_->GetScratchRegisterList()->GetList()); + ExcludeByRegList(masm_->GetScratchVRegisterList(), + masm_->GetScratchVRegisterList()->GetList()); + ExcludeByRegList(masm_->GetScratchPRegisterList(), + masm_->GetScratchPRegisterList()->GetList()); +} + + +CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available, + RegList mask) { + VIXL_CHECK((available->GetList() & mask) != 0); + CPURegister result = available->PopLowestIndex(mask); + VIXL_ASSERT(!AreAliased(result, xzr, sp)); + return result; +} + + +void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) { + ReleaseByRegList(available, static_cast(1) << code); +} + + +void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available, + RegList regs) { + available->SetList(available->GetList() | regs); +} + + +void UseScratchRegisterScope::IncludeByRegList(CPURegList* available, + RegList regs) { + available->SetList(available->GetList() | regs); +} + + +void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available, + RegList exclude) { + available->SetList(available->GetList() & ~exclude); +} + +CPURegList* UseScratchRegisterScope::GetAvailableListFor( + CPURegister::RegisterBank bank) { + switch (bank) { + case CPURegister::kNoRegisterBank: + return NULL; + case CPURegister::kRRegisterBank: + return masm_->GetScratchRegisterList(); + case CPURegister::kVRegisterBank: + return masm_->GetScratchVRegisterList(); + case CPURegister::kPRegisterBank: + return masm_->GetScratchPRegisterList(); + } + VIXL_UNREACHABLE(); + return NULL; +} + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/macro-assembler-sve-aarch64.cc b/3rdparty/vixl/src/aarch64/macro-assembler-sve-aarch64.cc new file mode 100644 index 0000000000..56a504cfd0 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/macro-assembler-sve-aarch64.cc @@ -0,0 +1,2288 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "macro-assembler-aarch64.h" + +namespace vixl { +namespace aarch64 { + +void MacroAssembler::AddSubHelper(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(imm.FitsInLane(zd)); + + // Simple, encodable cases. + if (TrySingleAddSub(option, zd, zn, imm)) return; + + VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate)); + bool add_imm = (option == kAddImmediate); + + // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one + // instruction. Also interpret the immediate as signed, so we can convert + // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc. + IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits())); + if (signed_imm.IsNegative()) { + AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate; + IntegerOperand n_imm(signed_imm.GetMagnitude()); + // IntegerOperand can represent -INT_MIN, so this is always safe. + VIXL_ASSERT(n_imm.IsPositiveOrZero()); + if (TrySingleAddSub(n_option, zd, zn, n_imm)) return; + } + + // Otherwise, fall back to dup + ADD_z_z/SUB_z_z. + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Dup(scratch, imm); + + SingleEmissionCheckScope guard(this); + if (add_imm) { + add(zd, zn, scratch); + } else { + sub(zd, zn, scratch); + } +} + +bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(imm.FitsInLane(zd)); + + int imm8; + int shift = -1; + if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { + MovprfxHelperScope guard(this, zd, zn); + switch (option) { + case kAddImmediate: + add(zd, zd, imm8, shift); + return true; + case kSubImmediate: + sub(zd, zd, imm8, shift); + return true; + } + } + return false; +} + +void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn, + SVEArithPredicatedFn reg_macro, + const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm, + bool is_signed) { + if (is_signed) { + // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi + if (imm.IsInt8()) { + MovprfxHelperScope guard(this, zd, zn); + (this->*imm_fn)(zd, zd, imm.AsInt8()); + return; + } + } else { + // E.g. UMIN_z_zi, UMAX_z_zi + if (imm.IsUint8()) { + MovprfxHelperScope guard(this, zd, zn); + (this->*imm_fn)(zd, zd, imm.AsUint8()); + return; + } + } + + UseScratchRegisterScope temps(this); + PRegister pg = temps.AcquireGoverningP(); + Ptrue(pg.WithSameLaneSizeAs(zd)); + + // Try to re-use zd if we can, so we can avoid a movprfx. + ZRegister scratch = + zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()) + : zd; + Dup(scratch, imm); + + // The vector-form macro for commutative operations will swap the arguments to + // avoid movprfx, if necessary. + (this->*reg_macro)(zd, pg.Merging(), zn, scratch); +} + +void MacroAssembler::Mul(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + IntArithImmFn imm_fn = &Assembler::mul; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); +} + +void MacroAssembler::Smin(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInSignedLane(zd)); + IntArithImmFn imm_fn = &Assembler::smin; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); +} + +void MacroAssembler::Smax(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInSignedLane(zd)); + IntArithImmFn imm_fn = &Assembler::smax; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true); +} + +void MacroAssembler::Umax(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); + IntArithImmFn imm_fn = &Assembler::umax; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); +} + +void MacroAssembler::Umin(const ZRegister& zd, + const ZRegister& zn, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInUnsignedLane(zd)); + IntArithImmFn imm_fn = &Assembler::umin; + SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin; + IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false); +} + +void MacroAssembler::Addpl(const Register& xd, + const Register& xn, + int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + + // This macro relies on `Rdvl` to handle some out-of-range cases. Check that + // `VL * multiplier` cannot overflow, for any possible value of VL. + VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); + VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); + + if (xd.IsZero()) return; + if (xn.IsZero() && xd.IsSP()) { + // TODO: This operation doesn't make much sense, but we could support it + // with a scratch register if necessary. + VIXL_UNIMPLEMENTED(); + } + + // Handling xzr requires an extra move, so defer it until later so we can try + // to use `rdvl` instead (via `Addvl`). + if (IsInt6(multiplier) && !xn.IsZero()) { + SingleEmissionCheckScope guard(this); + addpl(xd, xn, static_cast(multiplier)); + return; + } + + // If `multiplier` is a multiple of 8, we can use `Addvl` instead. + if ((multiplier % kZRegBitsPerPRegBit) == 0) { + Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit); + return; + } + + if (IsInt6(multiplier)) { + VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`. + // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so + // materialise a zero. + MacroEmissionCheckScope guard(this); + movz(xd, 0); + addpl(xd, xd, static_cast(multiplier)); + return; + } + + // TODO: Some probable cases result in rather long sequences. For example, + // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just + // outside the encodable range. We should look for ways to cover such cases + // without drastically increasing the complexity of this logic. + + // For other cases, calculate xn + (PL * multiplier) using discrete + // instructions. This requires two scratch registers in the general case, so + // try to re-use the destination as a scratch register. + UseScratchRegisterScope temps(this); + temps.Include(xd); + temps.Exclude(xn); + + Register scratch = temps.AcquireX(); + // Because there is no `rdpl`, so we have to calculate PL from VL. We can't + // scale the multiplier because (we already know) it isn't a multiple of 8. + Rdvl(scratch, multiplier); + + MacroEmissionCheckScope guard(this); + if (xn.IsZero()) { + asr(xd, scratch, kZRegBitsPerPRegBitLog2); + } else if (xd.IsSP() || xn.IsSP()) { + // TODO: MacroAssembler::Add should be able to handle this. + asr(scratch, scratch, kZRegBitsPerPRegBitLog2); + add(xd, xn, scratch); + } else { + add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2)); + } +} + +void MacroAssembler::Addvl(const Register& xd, + const Register& xn, + int64_t multiplier) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(xd.IsX()); + VIXL_ASSERT(xn.IsX()); + + // Check that `VL * multiplier` cannot overflow, for any possible value of VL. + VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes)); + VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes)); + + if (xd.IsZero()) return; + if (xn.IsZero() && xd.IsSP()) { + // TODO: This operation doesn't make much sense, but we could support it + // with a scratch register if necessary. `rdvl` cannot write into `sp`. + VIXL_UNIMPLEMENTED(); + } + + if (IsInt6(multiplier)) { + SingleEmissionCheckScope guard(this); + if (xn.IsZero()) { + rdvl(xd, static_cast(multiplier)); + } else { + addvl(xd, xn, static_cast(multiplier)); + } + return; + } + + // TODO: Some probable cases result in rather long sequences. For example, + // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just + // outside the encodable range. We should look for ways to cover such cases + // without drastically increasing the complexity of this logic. + + // For other cases, calculate xn + (VL * multiplier) using discrete + // instructions. This requires two scratch registers in the general case, so + // we try to re-use the destination as a scratch register. + UseScratchRegisterScope temps(this); + temps.Include(xd); + temps.Exclude(xn); + + Register a = temps.AcquireX(); + Mov(a, multiplier); + + MacroEmissionCheckScope guard(this); + Register b = temps.AcquireX(); + rdvl(b, 1); + if (xn.IsZero()) { + mul(xd, a, b); + } else if (xd.IsSP() || xn.IsSP()) { + mul(a, a, b); + add(xd, xn, a); + } else { + madd(xd, a, b, xn); + } +} + +void MacroAssembler::CalculateSVEAddress(const Register& xd, + const SVEMemOperand& addr, + int vl_divisor_log2) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(!addr.IsScatterGather()); + VIXL_ASSERT(xd.IsX()); + + // The lower bound is where a whole Z register is accessed. + VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0)); + // The upper bound is for P register accesses, and for instructions like + // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane. + VIXL_ASSERT(vl_divisor_log2 <= static_cast(kZRegBitsPerPRegBitLog2)); + + SVEOffsetModifier mod = addr.GetOffsetModifier(); + Register base = addr.GetScalarBase(); + + if (addr.IsEquivalentToScalar()) { + // For example: + // [x0] + // [x0, #0] + // [x0, xzr, LSL 2] + Mov(xd, base); + } else if (addr.IsScalarPlusImmediate()) { + // For example: + // [x0, #42] + // [x0, #42, MUL VL] + int64_t offset = addr.GetImmediateOffset(); + VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar. + if (addr.IsMulVl()) { + int vl_divisor = 1 << vl_divisor_log2; + // For all possible values of vl_divisor, we can simply use `Addpl`. This + // will select `addvl` if necessary. + VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0); + Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor)); + } else { + // IsScalarPlusImmediate() ensures that no other modifiers can occur. + VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); + Add(xd, base, offset); + } + } else if (addr.IsScalarPlusScalar()) { + // For example: + // [x0, x1] + // [x0, x1, LSL #4] + Register offset = addr.GetScalarOffset(); + VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar. + if (mod == SVE_LSL) { + Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount())); + } else { + // IsScalarPlusScalar() ensures that no other modifiers can occur. + VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER); + Add(xd, base, offset); + } + } else { + // All other forms are scatter-gather addresses, which cannot be evaluated + // into an X register. + VIXL_UNREACHABLE(); + } +} + +void MacroAssembler::Cpy(const ZRegister& zd, + const PRegister& pg, + IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInLane(zd)); + int imm8; + int shift; + if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { + SingleEmissionCheckScope guard(this); + cpy(zd, pg, imm8, shift); + return; + } + + // The fallbacks rely on `cpy` variants that only support merging predication. + // If zeroing predication was requested, zero the destination first. + if (pg.IsZeroing()) { + SingleEmissionCheckScope guard(this); + dup(zd, 0); + } + PRegisterM pg_m = pg.Merging(); + + // Try to encode the immediate using fcpy. + VIXL_ASSERT(imm.FitsInLane(zd)); + if (zd.GetLaneSizeInBits() >= kHRegSize) { + double fp_imm = 0.0; + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + fp_imm = + FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN); + break; + case kSRegSize: + fp_imm = RawbitsToFloat(imm.AsUint32()); + break; + case kDRegSize: + fp_imm = RawbitsToDouble(imm.AsUint64()); + break; + default: + VIXL_UNREACHABLE(); + break; + } + // IsImmFP64 is equivalent to IsImmFP for the same arithmetic value, so + // we can use IsImmFP64 for all lane sizes. + if (IsImmFP64(fp_imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg_m, fp_imm); + return; + } + } + + // Fall back to using a scratch register. + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireRegisterToHoldLane(zd); + Mov(scratch, imm); + + SingleEmissionCheckScope guard(this); + cpy(zd, pg_m, scratch); +} + +// TODO: We implement Fcpy (amongst other things) for all FP types because it +// allows us to preserve user-specified NaNs. We should come up with some +// FPImmediate type to abstract this, and avoid all the duplication below (and +// elsewhere). + +void MacroAssembler::Fcpy(const ZRegister& zd, + const PRegisterM& pg, + double imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pg.IsMerging()); + + if (IsImmFP64(imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg, imm); + return; + } + + // As a fall-back, cast the immediate to the required lane size, and try to + // encode the bit pattern using `Cpy`. + Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); +} + +void MacroAssembler::Fcpy(const ZRegister& zd, + const PRegisterM& pg, + float imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pg.IsMerging()); + + if (IsImmFP32(imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg, imm); + return; + } + + // As a fall-back, cast the immediate to the required lane size, and try to + // encode the bit pattern using `Cpy`. + Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); +} + +void MacroAssembler::Fcpy(const ZRegister& zd, + const PRegisterM& pg, + Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pg.IsMerging()); + + if (IsImmFP16(imm)) { + SingleEmissionCheckScope guard(this); + fcpy(zd, pg, imm); + return; + } + + // As a fall-back, cast the immediate to the required lane size, and try to + // encode the bit pattern using `Cpy`. + Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm)); +} + +void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInLane(zd)); + unsigned lane_size = zd.GetLaneSizeInBits(); + int imm8; + int shift; + if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) { + SingleEmissionCheckScope guard(this); + dup(zd, imm8, shift); + } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) { + SingleEmissionCheckScope guard(this); + dupm(zd, imm.AsUintN(lane_size)); + } else { + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireRegisterToHoldLane(zd); + Mov(scratch, imm); + + SingleEmissionCheckScope guard(this); + dup(zd, scratch); + } +} + +void MacroAssembler::NoncommutativeArithmeticHelper( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + SVEArithPredicatedFn rev_fn) { + if (zd.Aliases(zn)) { + // E.g. zd = zd / zm + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, pg, zn, zm); + } else if (zd.Aliases(zm)) { + // E.g. zd = zn / zd + SingleEmissionCheckScope guard(this); + (this->*rev_fn)(zd, pg, zm, zn); + } else { + // E.g. zd = zn / zm + MovprfxHelperScope guard(this, zd, pg, zn); + (this->*fn)(zd, pg, zd, zm); + } +} + +void MacroAssembler::FPCommutativeArithmeticHelper( + const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + SVEArithPredicatedFn fn, + FPMacroNaNPropagationOption nan_option) { + ResolveFPNaNPropagationOption(&nan_option); + + if (zd.Aliases(zn)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, pg, zd, zm); + } else if (zd.Aliases(zm)) { + switch (nan_option) { + case FastNaNPropagation: { + // Swap the arguments. + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, pg, zd, zn); + return; + } + case StrictNaNPropagation: { + UseScratchRegisterScope temps(this); + // Use a scratch register to keep the argument order exactly as + // specified. + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); + { + MovprfxHelperScope guard(this, scratch, pg, zn); + (this->*fn)(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + return; + } + case NoFPMacroNaNPropagationSelected: + VIXL_UNREACHABLE(); + return; + } + } else { + MovprfxHelperScope guard(this, zd, pg, zn); + (this->*fn)(zd, pg, zd, zm); + } +} + +// Instructions of the form "inst zda, zn, zm, #num", where they are +// non-commutative and no reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \ + V(Cmla, cmla) \ + V(Sqrdcmlah, sqrdcmlah) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int imm) { \ + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ + UseScratchRegisterScope temps(this); \ + VIXL_ASSERT(AreSameLaneSize(zn, zm)); \ + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); \ + Mov(ztmp, zd.Aliases(zn) ? zn : zm); \ + MovprfxHelperScope guard(this, zd, za); \ + ASMFN(zd, \ + (zd.Aliases(zn) ? ztmp : zn), \ + (zd.Aliases(zm) ? ztmp : zm), \ + imm); \ + } else { \ + MovprfxHelperScope guard(this, zd, za); \ + ASMFN(zd, zn, zm, imm); \ + } \ + } +VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +// Instructions of the form "inst zda, zn, zm, #num, #num", where they are +// non-commutative and no reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \ + V(Cmla, cmla) \ + V(Sqrdcmlah, sqrdcmlah) + +// This doesn't handle zm when it's out of the range that can be encoded in +// instruction. The range depends on element size: z0-z7 for H, z0-15 for S. +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int index, \ + int rot) { \ + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \ + UseScratchRegisterScope temps(this); \ + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); \ + { \ + MovprfxHelperScope guard(this, ztmp, za); \ + ASMFN(ztmp, zn, zm, index, rot); \ + } \ + Mov(zd, ztmp); \ + } else { \ + MovprfxHelperScope guard(this, zd, za); \ + ASMFN(zd, zn, zm, index, rot); \ + } \ + } +VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +// Instructions of the form "inst zda, pg, zda, zn", where they are +// non-commutative and no reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \ + V(Addp, addp) \ + V(Bic, bic) \ + V(Faddp, faddp) \ + V(Fmaxnmp, fmaxnmp) \ + V(Fminnmp, fminnmp) \ + V(Fmaxp, fmaxp) \ + V(Fminp, fminp) \ + V(Fscale, fscale) \ + V(Smaxp, smaxp) \ + V(Sminp, sminp) \ + V(Suqadd, suqadd) \ + V(Umaxp, umaxp) \ + V(Uminp, uminp) \ + V(Usqadd, usqadd) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const PRegisterM& pg, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + if (zd.Aliases(zm) && !zd.Aliases(zn)) { \ + UseScratchRegisterScope temps(this); \ + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \ + Mov(scratch, zm); \ + MovprfxHelperScope guard(this, zd, pg, zn); \ + ASMFN(zd, pg, zd, scratch); \ + } else { \ + MovprfxHelperScope guard(this, zd, pg, zn); \ + ASMFN(zd, pg, zd, zm); \ + } \ + } +VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +// Instructions of the form "inst zda, pg, zda, zn", where they are +// non-commutative and a reversed form is provided. +#define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \ + V(Asr, asr) \ + V(Fdiv, fdiv) \ + V(Fsub, fsub) \ + V(Lsl, lsl) \ + V(Lsr, lsr) \ + V(Sdiv, sdiv) \ + V(Shsub, shsub) \ + V(Sqrshl, sqrshl) \ + V(Sqshl, sqshl) \ + V(Sqsub, sqsub) \ + V(Srshl, srshl) \ + V(Sub, sub) \ + V(Udiv, udiv) \ + V(Uhsub, uhsub) \ + V(Uqrshl, uqrshl) \ + V(Uqshl, uqshl) \ + V(Uqsub, uqsub) \ + V(Urshl, urshl) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const PRegisterM& pg, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + NoncommutativeArithmeticHelper(zd, \ + pg, \ + zn, \ + zm, \ + static_cast( \ + &Assembler::ASMFN), \ + static_cast( \ + &Assembler::ASMFN##r)); \ + } +VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +void MacroAssembler::Fadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fadd), + nan_option); +} + +void MacroAssembler::Fabd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fabd), + nan_option); +} + +void MacroAssembler::Fmul(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fmul), + nan_option); +} + +void MacroAssembler::Fmulx(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fmulx), + nan_option); +} + +void MacroAssembler::Fmax(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fmax), + nan_option); +} + +void MacroAssembler::Fmin(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fmin), + nan_option); +} + +void MacroAssembler::Fmaxnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fmaxnm), + nan_option); +} + +void MacroAssembler::Fminnm(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPCommutativeArithmeticHelper(zd, + pg, + zn, + zm, + static_cast( + &Assembler::fminnm), + nan_option); +} + +void MacroAssembler::Fdup(const ZRegister& zd, double imm) { + VIXL_ASSERT(allow_macro_instructions_); + + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + Fdup(zd, Float16(imm)); + break; + case kSRegSize: + Fdup(zd, static_cast(imm)); + break; + case kDRegSize: + uint64_t bits = DoubleToRawbits(imm); + if (IsImmFP64(bits)) { + SingleEmissionCheckScope guard(this); + fdup(zd, imm); + } else { + Dup(zd, bits); + } + break; + } +} + +void MacroAssembler::Fdup(const ZRegister& zd, float imm) { + VIXL_ASSERT(allow_macro_instructions_); + + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + Fdup(zd, Float16(imm)); + break; + case kSRegSize: + if (IsImmFP32(imm)) { + SingleEmissionCheckScope guard(this); + fdup(zd, imm); + } else { + Dup(zd, FloatToRawbits(imm)); + } + break; + case kDRegSize: + Fdup(zd, static_cast(imm)); + break; + } +} + +void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) { + VIXL_ASSERT(allow_macro_instructions_); + + switch (zd.GetLaneSizeInBits()) { + case kHRegSize: + if (IsImmFP16(imm)) { + SingleEmissionCheckScope guard(this); + fdup(zd, imm); + } else { + Dup(zd, Float16ToRawbits(imm)); + } + break; + case kSRegSize: + Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN)); + break; + case kDRegSize: + Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN)); + break; + } +} + +void MacroAssembler::Index(const ZRegister& zd, + const Operand& start, + const Operand& step) { + class IndexOperand : public Operand { + public: + static IndexOperand Prepare(MacroAssembler* masm, + UseScratchRegisterScope* temps, + const Operand& op, + const ZRegister& zd_inner) { + // Look for encodable immediates. + int imm; + if (op.IsImmediate()) { + if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) { + return IndexOperand(imm); + } + Register scratch = temps->AcquireRegisterToHoldLane(zd_inner); + masm->Mov(scratch, op); + return IndexOperand(scratch); + } else { + // Plain registers can be encoded directly. + VIXL_ASSERT(op.IsPlainRegister()); + return IndexOperand(op.GetRegister()); + } + } + + int GetImm5() const { + int64_t imm = GetImmediate(); + VIXL_ASSERT(IsInt5(imm)); + return static_cast(imm); + } + + private: + explicit IndexOperand(const Register& reg) : Operand(reg) {} + explicit IndexOperand(int64_t imm) : Operand(imm) {} + }; + + UseScratchRegisterScope temps(this); + IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd); + IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd); + + SingleEmissionCheckScope guard(this); + if (start_enc.IsImmediate()) { + if (step_enc.IsImmediate()) { + index(zd, start_enc.GetImm5(), step_enc.GetImm5()); + } else { + index(zd, start_enc.GetImm5(), step_enc.GetRegister()); + } + } else { + if (step_enc.IsImmediate()) { + index(zd, start_enc.GetRegister(), step_enc.GetImm5()); + } else { + index(zd, start_enc.GetRegister(), step_enc.GetRegister()); + } + } +} + +void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(imm.FitsInLane(zdn)); + + if (imm.IsZero()) { + SingleEmissionCheckScope guard(this); + insr(zdn, xzr); + return; + } + + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireRegisterToHoldLane(zdn); + + // TODO: There are many cases where we could optimise immediates, such as by + // detecting repeating patterns or FP immediates. We should optimise and + // abstract this for use in other SVE mov-immediate-like macros. + Mov(scratch, imm); + + SingleEmissionCheckScope guard(this); + insr(zdn, scratch); +} + +void MacroAssembler::Mla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(za)) { + // zda = zda + (zn * zm) + SingleEmissionCheckScope guard(this); + mla(zd, pg, zn, zm); + } else if (zd.Aliases(zn)) { + // zdn = za + (zdn * zm) + SingleEmissionCheckScope guard(this); + mad(zd, pg, zm, za); + } else if (zd.Aliases(zm)) { + // Multiplication is commutative, so we can swap zn and zm. + // zdm = za + (zdm * zn) + SingleEmissionCheckScope guard(this); + mad(zd, pg, zn, za); + } else { + // zd = za + (zn * zm) + ExactAssemblyScope guard(this, 2 * kInstructionSize); + movprfx(zd, pg, za); + mla(zd, pg, zn, zm); + } +} + +void MacroAssembler::Mls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(za)) { + // zda = zda - (zn * zm) + SingleEmissionCheckScope guard(this); + mls(zd, pg, zn, zm); + } else if (zd.Aliases(zn)) { + // zdn = za - (zdn * zm) + SingleEmissionCheckScope guard(this); + msb(zd, pg, zm, za); + } else if (zd.Aliases(zm)) { + // Multiplication is commutative, so we can swap zn and zm. + // zdm = za - (zdm * zn) + SingleEmissionCheckScope guard(this); + msb(zd, pg, zn, za); + } else { + // zd = za - (zn * zm) + ExactAssemblyScope guard(this, 2 * kInstructionSize); + movprfx(zd, pg, za); + mls(zd, pg, zn, zm); + } +} + +void MacroAssembler::CompareHelper(Condition cond, + const PRegisterWithLaneSize& pd, + const PRegisterZ& pg, + const ZRegister& zn, + IntegerOperand imm) { + UseScratchRegisterScope temps(this); + ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Dup(zm, imm); + SingleEmissionCheckScope guard(this); + cmp(cond, pd, pg, zn, zm); +} + +void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(pd.IsLaneSizeB()); + VIXL_ASSERT(pn.IsLaneSizeB()); + if (pd.Is(pn)) { + SingleEmissionCheckScope guard(this); + pfirst(pd, pg, pn); + } else { + UseScratchRegisterScope temps(this); + PRegister temp_pg = pg; + if (pd.Aliases(pg)) { + temp_pg = temps.AcquireP(); + Mov(temp_pg.VnB(), pg.VnB()); + } + Mov(pd, pn); + SingleEmissionCheckScope guard(this); + pfirst(pd, temp_pg, pd); + } +} + +void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd, + const PRegister& pg, + const PRegisterWithLaneSize& pn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(AreSameFormat(pd, pn)); + if (pd.Is(pn)) { + SingleEmissionCheckScope guard(this); + pnext(pd, pg, pn); + } else { + UseScratchRegisterScope temps(this); + PRegister temp_pg = pg; + if (pd.Aliases(pg)) { + temp_pg = temps.AcquireP(); + Mov(temp_pg.VnB(), pg.VnB()); + } + Mov(pd.VnB(), pn.VnB()); + SingleEmissionCheckScope guard(this); + pnext(pd, temp_pg, pd); + } +} + +void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd, + SVEPredicateConstraint pattern, + FlagsUpdate s) { + VIXL_ASSERT(allow_macro_instructions_); + switch (s) { + case LeaveFlags: + Ptrue(pd, pattern); + return; + case SetFlags: + Ptrues(pd, pattern); + return; + } + VIXL_UNREACHABLE(); +} + +void MacroAssembler::Sub(const ZRegister& zd, + IntegerOperand imm, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + + int imm8; + int shift = -1; + if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) || + imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) { + MovprfxHelperScope guard(this, zd, zm); + subr(zd, zd, imm8, shift); + } else { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits()); + Dup(scratch, imm); + + SingleEmissionCheckScope guard(this); + sub(zd, scratch, zm); + } +} + +void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + SVELoadBroadcastFn fn, + int divisor) { + VIXL_ASSERT(addr.IsScalarPlusImmediate()); + int64_t imm = addr.GetImmediateOffset(); + if ((imm % divisor == 0) && IsUint6(imm / divisor)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + } else { + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX(); + CalculateSVEAddress(scratch, addr, zt); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(scratch)); + } +} + +void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt, + const SVEMemOperand& addr, + SVELoadStoreFn fn) { + VIXL_ASSERT(allow_macro_instructions_); + VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister()); + + if (addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) && + addr.IsMulVl())) { + SingleEmissionCheckScope guard(this); + (this->*fn)(rt, addr); + return; + } + + if (addr.IsEquivalentToScalar()) { + SingleEmissionCheckScope guard(this); + (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase())); + return; + } + + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX(); + CalculateSVEAddress(scratch, addr, rt); + SingleEmissionCheckScope guard(this); + (this->*fn)(rt, SVEMemOperand(scratch)); +} + +template +void MacroAssembler::SVELoadStoreNTBroadcastQOHelper( + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn, + int imm_bits, + int shift_amount, + SVEOffsetModifier supported_modifier, + int vl_divisor_log2) { + VIXL_ASSERT(allow_macro_instructions_); + int imm_divisor = 1 << shift_amount; + + if (addr.IsPlainScalar() || + (addr.IsScalarPlusImmediate() && + IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) && + ((addr.GetImmediateOffset() % imm_divisor) == 0) && + (addr.GetOffsetModifier() == supported_modifier))) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && + addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + if (addr.IsEquivalentToScalar()) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); + return; + } + + if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) && + (vl_divisor_log2 == -1)) { + // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL + // dependent. + VIXL_UNIMPLEMENTED(); + } + + UseScratchRegisterScope temps(this); + Register scratch = temps.AcquireX(); + CalculateSVEAddress(scratch, addr, vl_divisor_log2); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(scratch)); +} + +template +void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2, + const ZRegister& zt, + const Tg& pg, + const SVEMemOperand& addr, + Tf fn) { + if (addr.IsPlainScalar() || + (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() && + addr.IsEquivalentToLSL(msize_in_bytes_log2)) || + (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) && + addr.IsMulVl())) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + if (addr.IsEquivalentToScalar()) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase())); + return; + } + + if (addr.IsVectorPlusImmediate()) { + uint64_t offset = addr.GetImmediateOffset(); + if (IsMultiple(offset, (1 << msize_in_bytes_log2)) && + IsUint5(offset >> msize_in_bytes_log2)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + } + + if (addr.IsScalarPlusVector()) { + VIXL_ASSERT(addr.IsScatterGather()); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + UseScratchRegisterScope temps(this); + if (addr.IsScatterGather()) { + // In scatter-gather modes, zt and zn/zm have the same lane size. However, + // for 32-bit accesses, the result of each lane's address calculation still + // requires 64 bits; we can't naively use `Adr` for the address calculation + // because it would truncate each address to 32 bits. + + if (addr.IsVectorPlusImmediate()) { + // Synthesise the immediate in an X register, then use a + // scalar-plus-vector access with the original vector. + Register scratch = temps.AcquireX(); + Mov(scratch, addr.GetImmediateOffset()); + SingleEmissionCheckScope guard(this); + SVEOffsetModifier om = + zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER; + (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om)); + return; + } + + VIXL_UNIMPLEMENTED(); + } else { + Register scratch = temps.AcquireX(); + // TODO: If we have an immediate offset that is a multiple of + // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to + // save an instruction. + int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2; + CalculateSVEAddress(scratch, addr, vl_divisor_log2); + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, SVEMemOperand(scratch)); + } +} + +template +void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2, + const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr, + Tf fn) { + if (addr.IsScatterGather()) { + // Scatter-gather first-fault loads share encodings with normal loads. + SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn); + return; + } + + // Contiguous first-faulting loads have no scalar-plus-immediate form at all, + // so we don't do immediate synthesis. + + // We cannot currently distinguish "[x0]" from "[x0, #0]", and this + // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here. + if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() && + addr.IsEquivalentToLSL(msize_in_bytes_log2))) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zt, pg, addr); + return; + } + + VIXL_UNIMPLEMENTED(); +} + +void MacroAssembler::Ld1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ld1b)); +} + +void MacroAssembler::Ld1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ld1h)); +} + +void MacroAssembler::Ld1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kWRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ld1w)); +} + +void MacroAssembler::Ld1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kDRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ld1d)); +} + +void MacroAssembler::Ld1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ld1sb)); +} + +void MacroAssembler::Ld1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ld1sh)); +} + +void MacroAssembler::Ld1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ld1sw)); +} + +void MacroAssembler::St1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::st1b)); +} + +void MacroAssembler::St1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::st1h)); +} + +void MacroAssembler::St1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::st1w)); +} + +void MacroAssembler::St1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadStore1Helper(kDRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::st1d)); +} + +void MacroAssembler::Ldff1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ldff1b)); +} + +void MacroAssembler::Ldff1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ldff1h)); +} + +void MacroAssembler::Ldff1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ldff1w)); +} + +void MacroAssembler::Ldff1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kDRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ldff1d)); +} + +void MacroAssembler::Ldff1sb(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kBRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ldff1sb)); +} + +void MacroAssembler::Ldff1sh(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kHRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ldff1sh)); +} + +void MacroAssembler::Ldff1sw(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + SVELoadFFHelper(kSRegSizeInBytesLog2, + zt, + pg, + addr, + static_cast(&Assembler::ldff1sw)); +} + +#define VIXL_SVE_LD1R_LIST(V) \ + V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5) + +#define VIXL_DEFINE_MASM_FUNC(SZ, SH) \ + void MacroAssembler::Ld1r##SZ(const ZRegister& zt, \ + const PRegisterZ& pg, \ + const SVEMemOperand& addr) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + SVELoadStoreNTBroadcastQOHelper(zt, \ + pg, \ + addr, \ + &MacroAssembler::ld1r##SZ, \ + 4, \ + SH, \ + NO_SVE_OFFSET_MODIFIER, \ + -1); \ + } + +VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC) + +#undef VIXL_DEFINE_MASM_FUNC +#undef VIXL_SVE_LD1R_LIST + +void MacroAssembler::Ldnt1b(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1b(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1b, + 4, + 0, + SVE_MUL_VL); + } +} + +void MacroAssembler::Ldnt1d(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1d(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1d, + 4, + 0, + SVE_MUL_VL); + } +} + +void MacroAssembler::Ldnt1h(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1h(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1h, + 4, + 0, + SVE_MUL_VL); + } +} + +void MacroAssembler::Ldnt1w(const ZRegister& zt, + const PRegisterZ& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + ldnt1w(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::ldnt1w, + 4, + 0, + SVE_MUL_VL); + } +} + +void MacroAssembler::Stnt1b(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1b(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1b, + 4, + 0, + SVE_MUL_VL); + } +} +void MacroAssembler::Stnt1d(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1d(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1d, + 4, + 0, + SVE_MUL_VL); + } +} +void MacroAssembler::Stnt1h(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1h(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1h, + 4, + 0, + SVE_MUL_VL); + } +} +void MacroAssembler::Stnt1w(const ZRegister& zt, + const PRegister& pg, + const SVEMemOperand& addr) { + VIXL_ASSERT(allow_macro_instructions_); + if (addr.IsVectorPlusScalar()) { + SingleEmissionCheckScope guard(this); + stnt1w(zt, pg, addr); + } else { + SVELoadStoreNTBroadcastQOHelper(zt, + pg, + addr, + &MacroAssembler::stnt1w, + 4, + 0, + SVE_MUL_VL); + } +} + +void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + if (zd.Aliases(za)) { + // zda = zda + (zn . zm) + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, zn, zm, index); + + } else if (zd.Aliases(zn) || zd.Aliases(zm)) { + // zdn = za + (zdn . zm[index]) + // zdm = za + (zn . zdm[index]) + // zdnm = za + (zdnm . zdnm[index]) + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, zn, zm, index); + } + + Mov(zd, scratch); + } else { + // zd = za + (zn . zm) + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm, index); + } +} + +void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { + // zd = za . zd . zm + // zd = za . zn . zd + // zd = za . zd . zd + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, zn, zm); + } + + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm); + } +} + +void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { + // zd = za . zd . zm + // zd = za . zn . zd + // zd = za . zd . zd + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, scratch, zn, zm); + } + + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zd, zn, zm); + } +} + +void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int imm) { + if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) { + // zd = za . zd . zm[i] + // zd = za . zn . zd[i] + // zd = za . zd . zd[i] + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, za); + (this->*fn)(scratch, zn, zm, imm); + } + + Mov(zd, scratch); + } else { + // zd = za . zn . zm[i] + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm, imm); + } +} + +void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm) { + if (zn.Aliases(zm)) { + // If zn == zm, the difference is zero. + if (!zd.Aliases(za)) { + Mov(zd, za); + } + } else if (zd.Aliases(za)) { + SingleEmissionCheckScope guard(this); + (this->*fn)(zd, zn, zm); + } else if (zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Mov(ztmp, zn); + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, ztmp, zm); + } else if (zd.Aliases(zm)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits()); + Mov(ztmp, zm); + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, ztmp); + } else { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, zm); + } +} + +#define VIXL_SVE_4REG_LIST(V) \ + V(Saba, saba, AbsoluteDifferenceAccumulate) \ + V(Uaba, uaba, AbsoluteDifferenceAccumulate) \ + V(Sabalb, sabalb, AbsoluteDifferenceAccumulate) \ + V(Sabalt, sabalt, AbsoluteDifferenceAccumulate) \ + V(Uabalb, uabalb, AbsoluteDifferenceAccumulate) \ + V(Uabalt, uabalt, AbsoluteDifferenceAccumulate) \ + V(Sdot, sdot, FourRegDestructiveHelper) \ + V(Udot, udot, FourRegDestructiveHelper) \ + V(Adclb, adclb, FourRegDestructiveHelper) \ + V(Adclt, adclt, FourRegDestructiveHelper) \ + V(Sbclb, sbclb, FourRegDestructiveHelper) \ + V(Sbclt, sbclt, FourRegDestructiveHelper) \ + V(Smlalb, smlalb, FourRegDestructiveHelper) \ + V(Smlalt, smlalt, FourRegDestructiveHelper) \ + V(Smlslb, smlslb, FourRegDestructiveHelper) \ + V(Smlslt, smlslt, FourRegDestructiveHelper) \ + V(Umlalb, umlalb, FourRegDestructiveHelper) \ + V(Umlalt, umlalt, FourRegDestructiveHelper) \ + V(Umlslb, umlslb, FourRegDestructiveHelper) \ + V(Umlslt, umlslt, FourRegDestructiveHelper) \ + V(Bcax, bcax, FourRegDestructiveHelper) \ + V(Bsl, bsl, FourRegDestructiveHelper) \ + V(Bsl1n, bsl1n, FourRegDestructiveHelper) \ + V(Bsl2n, bsl2n, FourRegDestructiveHelper) \ + V(Eor3, eor3, FourRegDestructiveHelper) \ + V(Nbsl, nbsl, FourRegDestructiveHelper) \ + V(Fmlalb, fmlalb, FourRegDestructiveHelper) \ + V(Fmlalt, fmlalt, FourRegDestructiveHelper) \ + V(Fmlslb, fmlslb, FourRegDestructiveHelper) \ + V(Fmlslt, fmlslt, FourRegDestructiveHelper) \ + V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper) \ + V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \ + V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper) \ + V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper) \ + V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \ + V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper) \ + V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper) \ + V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper) \ + V(Fmmla, fmmla, FourRegDestructiveHelper) \ + V(Smmla, smmla, FourRegDestructiveHelper) \ + V(Ummla, ummla, FourRegDestructiveHelper) \ + V(Usmmla, usmmla, FourRegDestructiveHelper) \ + V(Usdot, usdot, FourRegDestructiveHelper) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + HELPER(&Assembler::ASMFN, zd, za, zn, zm); \ + } +VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +#define VIXL_SVE_4REG_1IMM_LIST(V) \ + V(Fmla, fmla, FourRegOneImmDestructiveHelper) \ + V(Fmls, fmls, FourRegOneImmDestructiveHelper) \ + V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper) \ + V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper) \ + V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper) \ + V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper) \ + V(Mla, mla, FourRegOneImmDestructiveHelper) \ + V(Mls, mls, FourRegOneImmDestructiveHelper) \ + V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \ + V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \ + V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \ + V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \ + V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \ + V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \ + V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \ + V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \ + V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \ + V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \ + V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \ + V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \ + V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \ + V(Umlslt, umlslt, FourRegOneImmDestructiveHelper) + +#define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \ + void MacroAssembler::MASMFN(const ZRegister& zd, \ + const ZRegister& za, \ + const ZRegister& zn, \ + const ZRegister& zm, \ + int imm) { \ + VIXL_ASSERT(allow_macro_instructions_); \ + HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm); \ + } +VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC) +#undef VIXL_DEFINE_MASM_FUNC + +void MacroAssembler::Sdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index); +} + +void MacroAssembler::Udot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index); +} + +void MacroAssembler::Sudot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index); +} + +void MacroAssembler::Usdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index) { + VIXL_ASSERT(allow_macro_instructions_); + SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index); +} + +void MacroAssembler::Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int index, + int rot) { + // This doesn't handle zm when it's out of the range that can be encoded in + // instruction. The range depends on element size: z0-z7 for B, z0-15 for H. + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, ztmp, za); + cdot(ztmp, zn, zm, index, rot); + } + Mov(zd, ztmp); + } else { + MovprfxHelperScope guard(this, zd, za); + cdot(zd, zn, zm, index, rot); + } +} + +void MacroAssembler::Cdot(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { + UseScratchRegisterScope temps(this); + VIXL_ASSERT(AreSameLaneSize(zn, zm)); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); + Mov(ztmp, zd.Aliases(zn) ? zn : zm); + MovprfxHelperScope guard(this, zd, za); + cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot); + } else { + MovprfxHelperScope guard(this, zd, za); + cdot(zd, zn, zm, rot); + } +} + +void MacroAssembler::FPMulAddHelper(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + SVEMulAddPredicatedZdaFn fn_zda, + SVEMulAddPredicatedZdnFn fn_zdn, + FPMacroNaNPropagationOption nan_option) { + ResolveFPNaNPropagationOption(&nan_option); + + if (zd.Aliases(za)) { + // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. + SingleEmissionCheckScope guard(this); + (this->*fn_zda)(zd, pg, zn, zm); + } else if (zd.Aliases(zn)) { + // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb. + SingleEmissionCheckScope guard(this); + (this->*fn_zdn)(zd, pg, zm, za); + } else if (zd.Aliases(zm)) { + switch (nan_option) { + case FastNaNPropagation: { + // We treat multiplication as commutative in the fast mode, so we can + // swap zn and zm. + // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb. + SingleEmissionCheckScope guard(this); + (this->*fn_zdn)(zd, pg, zn, za); + return; + } + case StrictNaNPropagation: { + UseScratchRegisterScope temps(this); + // Use a scratch register to keep the argument order exactly as + // specified. + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn); + { + MovprfxHelperScope guard(this, scratch, pg, za); + // scratch = (-)za + ((-)zn * zm) + (this->*fn_zda)(scratch, pg, zn, zm); + } + Mov(zd, scratch); + return; + } + case NoFPMacroNaNPropagationSelected: + VIXL_UNREACHABLE(); + return; + } + } else { + // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls. + MovprfxHelperScope guard(this, zd, pg, za); + (this->*fn_zda)(zd, pg, zn, zm); + } +} + +void MacroAssembler::Fmla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fmla, + &Assembler::fmad, + nan_option); +} + +void MacroAssembler::Fmls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fmls, + &Assembler::fmsb, + nan_option); +} + +void MacroAssembler::Fnmla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fnmla, + &Assembler::fnmad, + nan_option); +} + +void MacroAssembler::Fnmls(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + FPMacroNaNPropagationOption nan_option) { + VIXL_ASSERT(allow_macro_instructions_); + FPMulAddHelper(zd, + pg, + za, + zn, + zm, + &Assembler::fnmls, + &Assembler::fnmsb, + nan_option); +} + +void MacroAssembler::Ftmad(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int imm3) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); + Mov(scratch, zm); + MovprfxHelperScope guard(this, zd, zn); + ftmad(zd, zd, scratch, imm3); + } else { + MovprfxHelperScope guard(this, zd, zn); + ftmad(zd, zd, zm, imm3); + } +} + +void MacroAssembler::Fcadd(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, pg, zn); + fcadd(scratch, pg, scratch, zm, rot); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, pg, zn); + fcadd(zd, pg, zd, zm, rot); + } +} + +void MacroAssembler::Fcmla(const ZRegister& zd, + const PRegisterM& pg, + const ZRegister& za, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, ztmp, za); + fcmla(ztmp, pg, zn, zm, rot); + } + Mov(zd, pg, ztmp); + } else { + MovprfxHelperScope guard(this, zd, pg, za); + fcmla(zd, pg, zn, zm, rot); + } +} + +void MacroAssembler::Splice(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) { + SingleEmissionCheckScope guard(this); + splice(zd, pg, zn, zm); + } else if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, zn); + splice(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, zn); + splice(zd, pg, zd, zm); + } +} + +void MacroAssembler::Clasta(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, zn); + clasta(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, zn); + clasta(zd, pg, zd, zm); + } +} + +void MacroAssembler::Clastb(const ZRegister& zd, + const PRegister& pg, + const ZRegister& zn, + const ZRegister& zm) { + VIXL_ASSERT(allow_macro_instructions_); + if (zd.Aliases(zm) && !zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd); + { + MovprfxHelperScope guard(this, scratch, zn); + clastb(scratch, pg, scratch, zm); + } + Mov(zd, scratch); + } else { + MovprfxHelperScope guard(this, zd, zn); + clastb(zd, pg, zd, zm); + } +} + +void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn, + const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + VIXL_ASSERT(allow_macro_instructions_); + if (!zd.Aliases(za) && zd.Aliases(zn)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); + Mov(ztmp, zn); + { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, ztmp, shift); + } + } else { + MovprfxHelperScope guard(this, zd, za); + (this->*fn)(zd, zn, shift); + } +} + +void MacroAssembler::Srsra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift); +} + +void MacroAssembler::Ssra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift); +} + +void MacroAssembler::Ursra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift); +} + +void MacroAssembler::Usra(const ZRegister& zd, + const ZRegister& za, + const ZRegister& zn, + int shift) { + ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift); +} + +void MacroAssembler::ComplexAddition(ZZZImmFn fn, + const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + VIXL_ASSERT(allow_macro_instructions_); + if (!zd.Aliases(zn) && zd.Aliases(zm)) { + UseScratchRegisterScope temps(this); + ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm); + Mov(ztmp, zm); + { + MovprfxHelperScope guard(this, zd, zn); + (this->*fn)(zd, zd, ztmp, rot); + } + } else { + MovprfxHelperScope guard(this, zd, zn); + (this->*fn)(zd, zd, zm, rot); + } +} + +void MacroAssembler::Cadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + ComplexAddition(&Assembler::cadd, zd, zn, zm, rot); +} + +void MacroAssembler::Sqcadd(const ZRegister& zd, + const ZRegister& zn, + const ZRegister& zm, + int rot) { + ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot); +} + +} // namespace aarch64 +} // namespace vixl diff --git a/3rdparty/vixl/src/aarch64/operands-aarch64.cc b/3rdparty/vixl/src/aarch64/operands-aarch64.cc new file mode 100644 index 0000000000..e01d19074a --- /dev/null +++ b/3rdparty/vixl/src/aarch64/operands-aarch64.cc @@ -0,0 +1,469 @@ +// Copyright 2016, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "operands-aarch64.h" + +namespace vixl { +namespace aarch64 { + +// CPURegList utilities. +CPURegister CPURegList::PopLowestIndex(RegList mask) { + RegList list = list_ & mask; + if (list == 0) return NoCPUReg; + int index = CountTrailingZeros(list); + VIXL_ASSERT(((static_cast(1) << index) & list) != 0); + Remove(index); + return CPURegister(index, size_, type_); +} + + +CPURegister CPURegList::PopHighestIndex(RegList mask) { + RegList list = list_ & mask; + if (list == 0) return NoCPUReg; + int index = CountLeadingZeros(list); + index = kRegListSizeInBits - 1 - index; + VIXL_ASSERT(((static_cast(1) << index) & list) != 0); + Remove(index); + return CPURegister(index, size_, type_); +} + + +bool CPURegList::IsValid() const { + if (type_ == CPURegister::kNoRegister) { + // We can't use IsEmpty here because that asserts IsValid(). + return list_ == 0; + } else { + bool is_valid = true; + // Try to create a CPURegister for each element in the list. + for (int i = 0; i < kRegListSizeInBits; i++) { + if (((list_ >> i) & 1) != 0) { + is_valid &= CPURegister(i, size_, type_).IsValid(); + } + } + return is_valid; + } +} + + +void CPURegList::RemoveCalleeSaved() { + if (GetType() == CPURegister::kRegister) { + Remove(GetCalleeSaved(GetRegisterSizeInBits())); + } else if (GetType() == CPURegister::kVRegister) { + Remove(GetCalleeSavedV(GetRegisterSizeInBits())); + } else { + VIXL_ASSERT(GetType() == CPURegister::kNoRegister); + VIXL_ASSERT(IsEmpty()); + // The list must already be empty, so do nothing. + } +} + + +CPURegList CPURegList::Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3) { + return Union(list_1, Union(list_2, list_3)); +} + + +CPURegList CPURegList::Union(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4) { + return Union(Union(list_1, list_2), Union(list_3, list_4)); +} + + +CPURegList CPURegList::Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3) { + return Intersection(list_1, Intersection(list_2, list_3)); +} + + +CPURegList CPURegList::Intersection(const CPURegList& list_1, + const CPURegList& list_2, + const CPURegList& list_3, + const CPURegList& list_4) { + return Intersection(Intersection(list_1, list_2), + Intersection(list_3, list_4)); +} + + +CPURegList CPURegList::GetCalleeSaved(unsigned size) { + return CPURegList(CPURegister::kRegister, size, 19, 29); +} + + +CPURegList CPURegList::GetCalleeSavedV(unsigned size) { + return CPURegList(CPURegister::kVRegister, size, 8, 15); +} + + +CPURegList CPURegList::GetCallerSaved(unsigned size) { + // Registers x0-x18 and lr (x30) are caller-saved. + CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 18); + // Do not use lr directly to avoid initialisation order fiasco bugs for users. + list.Combine(Register(30, kXRegSize)); + return list; +} + + +CPURegList CPURegList::GetCallerSavedV(unsigned size) { + // Registers d0-d7 and d16-d31 are caller-saved. + CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7); + list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31)); + return list; +} + + +const CPURegList kCalleeSaved = CPURegList::GetCalleeSaved(); +const CPURegList kCalleeSavedV = CPURegList::GetCalleeSavedV(); +const CPURegList kCallerSaved = CPURegList::GetCallerSaved(); +const CPURegList kCallerSavedV = CPURegList::GetCallerSavedV(); + +// Operand. +Operand::Operand(int64_t immediate) + : immediate_(immediate), + reg_(NoReg), + shift_(NO_SHIFT), + extend_(NO_EXTEND), + shift_amount_(0) {} + +Operand::Operand(IntegerOperand immediate) + : immediate_(immediate.AsIntN(64)), + reg_(NoReg), + shift_(NO_SHIFT), + extend_(NO_EXTEND), + shift_amount_(0) {} + +Operand::Operand(Register reg, Shift shift, unsigned shift_amount) + : reg_(reg), + shift_(shift), + extend_(NO_EXTEND), + shift_amount_(shift_amount) { + VIXL_ASSERT(shift != MSL); + VIXL_ASSERT(reg.Is64Bits() || (shift_amount < kWRegSize)); + VIXL_ASSERT(reg.Is32Bits() || (shift_amount < kXRegSize)); + VIXL_ASSERT(!reg.IsSP()); +} + + +Operand::Operand(Register reg, Extend extend, unsigned shift_amount) + : reg_(reg), + shift_(NO_SHIFT), + extend_(extend), + shift_amount_(shift_amount) { + VIXL_ASSERT(reg.IsValid()); + VIXL_ASSERT(shift_amount <= 4); + VIXL_ASSERT(!reg.IsSP()); + + // Extend modes SXTX and UXTX require a 64-bit register. + VIXL_ASSERT(reg.Is64Bits() || ((extend != SXTX) && (extend != UXTX))); +} + + +bool Operand::IsImmediate() const { return reg_.Is(NoReg); } + + +bool Operand::IsPlainRegister() const { + return reg_.IsValid() && + (((shift_ == NO_SHIFT) && (extend_ == NO_EXTEND)) || + // No-op shifts. + ((shift_ != NO_SHIFT) && (shift_amount_ == 0)) || + // No-op extend operations. + // We can't include [US]XTW here without knowing more about the + // context; they are only no-ops for 32-bit operations. + // + // For example, this operand could be replaced with w1: + // __ Add(w0, w0, Operand(w1, UXTW)); + // However, no plain register can replace it in this context: + // __ Add(x0, x0, Operand(w1, UXTW)); + (((extend_ == UXTX) || (extend_ == SXTX)) && (shift_amount_ == 0))); +} + + +bool Operand::IsShiftedRegister() const { + return reg_.IsValid() && (shift_ != NO_SHIFT); +} + + +bool Operand::IsExtendedRegister() const { + return reg_.IsValid() && (extend_ != NO_EXTEND); +} + + +bool Operand::IsZero() const { + if (IsImmediate()) { + return GetImmediate() == 0; + } else { + return GetRegister().IsZero(); + } +} + + +Operand Operand::ToExtendedRegister() const { + VIXL_ASSERT(IsShiftedRegister()); + VIXL_ASSERT((shift_ == LSL) && (shift_amount_ <= 4)); + return Operand(reg_, reg_.Is64Bits() ? UXTX : UXTW, shift_amount_); +} + + +// MemOperand +MemOperand::MemOperand() + : base_(NoReg), + regoffset_(NoReg), + offset_(0), + addrmode_(Offset), + shift_(NO_SHIFT), + extend_(NO_EXTEND) {} + + +MemOperand::MemOperand(Register base, int64_t offset, AddrMode addrmode) + : base_(base), + regoffset_(NoReg), + offset_(offset), + addrmode_(addrmode), + shift_(NO_SHIFT), + extend_(NO_EXTEND), + shift_amount_(0) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); +} + + +MemOperand::MemOperand(Register base, + Register regoffset, + Extend extend, + unsigned shift_amount) + : base_(base), + regoffset_(regoffset), + offset_(0), + addrmode_(Offset), + shift_(NO_SHIFT), + extend_(extend), + shift_amount_(shift_amount) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); + VIXL_ASSERT(!regoffset.IsSP()); + VIXL_ASSERT((extend == UXTW) || (extend == SXTW) || (extend == SXTX)); + + // SXTX extend mode requires a 64-bit offset register. + VIXL_ASSERT(regoffset.Is64Bits() || (extend != SXTX)); +} + + +MemOperand::MemOperand(Register base, + Register regoffset, + Shift shift, + unsigned shift_amount) + : base_(base), + regoffset_(regoffset), + offset_(0), + addrmode_(Offset), + shift_(shift), + extend_(NO_EXTEND), + shift_amount_(shift_amount) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); + VIXL_ASSERT(regoffset.Is64Bits() && !regoffset.IsSP()); + VIXL_ASSERT(shift == LSL); +} + + +MemOperand::MemOperand(Register base, const Operand& offset, AddrMode addrmode) + : base_(base), + regoffset_(NoReg), + addrmode_(addrmode), + shift_(NO_SHIFT), + extend_(NO_EXTEND), + shift_amount_(0) { + VIXL_ASSERT(base.Is64Bits() && !base.IsZero()); + + if (offset.IsImmediate()) { + offset_ = offset.GetImmediate(); + } else if (offset.IsShiftedRegister()) { + VIXL_ASSERT((addrmode == Offset) || (addrmode == PostIndex)); + + regoffset_ = offset.GetRegister(); + shift_ = offset.GetShift(); + shift_amount_ = offset.GetShiftAmount(); + + extend_ = NO_EXTEND; + offset_ = 0; + + // These assertions match those in the shifted-register constructor. + VIXL_ASSERT(regoffset_.Is64Bits() && !regoffset_.IsSP()); + VIXL_ASSERT(shift_ == LSL); + } else { + VIXL_ASSERT(offset.IsExtendedRegister()); + VIXL_ASSERT(addrmode == Offset); + + regoffset_ = offset.GetRegister(); + extend_ = offset.GetExtend(); + shift_amount_ = offset.GetShiftAmount(); + + shift_ = NO_SHIFT; + offset_ = 0; + + // These assertions match those in the extended-register constructor. + VIXL_ASSERT(!regoffset_.IsSP()); + VIXL_ASSERT((extend_ == UXTW) || (extend_ == SXTW) || (extend_ == SXTX)); + VIXL_ASSERT((regoffset_.Is64Bits() || (extend_ != SXTX))); + } +} + + +bool MemOperand::IsPlainRegister() const { + return IsImmediateOffset() && (GetOffset() == 0); +} + + +bool MemOperand::IsEquivalentToPlainRegister() const { + if (regoffset_.Is(NoReg)) { + // Immediate offset, pre-index or post-index. + return GetOffset() == 0; + } else if (GetRegisterOffset().IsZero()) { + // Zero register offset, pre-index or post-index. + // We can ignore shift and extend options because they all result in zero. + return true; + } + return false; +} + + +bool MemOperand::IsImmediateOffset() const { + return (addrmode_ == Offset) && regoffset_.Is(NoReg); +} + + +bool MemOperand::IsRegisterOffset() const { + return (addrmode_ == Offset) && !regoffset_.Is(NoReg); +} + +bool MemOperand::IsPreIndex() const { return addrmode_ == PreIndex; } +bool MemOperand::IsPostIndex() const { return addrmode_ == PostIndex; } + +bool MemOperand::IsImmediatePreIndex() const { + return IsPreIndex() && regoffset_.Is(NoReg); +} + +bool MemOperand::IsImmediatePostIndex() const { + return IsPostIndex() && regoffset_.Is(NoReg); +} + +void MemOperand::AddOffset(int64_t offset) { + VIXL_ASSERT(IsImmediateOffset()); + offset_ += offset; +} + + +bool SVEMemOperand::IsValid() const { +#ifdef VIXL_DEBUG + { + // It should not be possible for an SVEMemOperand to match multiple types. + int count = 0; + if (IsScalarPlusImmediate()) count++; + if (IsScalarPlusScalar()) count++; + if (IsScalarPlusVector()) count++; + if (IsVectorPlusImmediate()) count++; + if (IsVectorPlusScalar()) count++; + if (IsVectorPlusVector()) count++; + VIXL_ASSERT(count <= 1); + } +#endif + + // We can't have a register _and_ an immediate offset. + if ((offset_ != 0) && (!regoffset_.IsNone())) return false; + + if (shift_amount_ != 0) { + // Only shift and extend modifiers can take a shift amount. + switch (mod_) { + case NO_SVE_OFFSET_MODIFIER: + case SVE_MUL_VL: + return false; + case SVE_LSL: + case SVE_UXTW: + case SVE_SXTW: + // Fall through. + break; + } + } + + return IsScalarPlusImmediate() || IsScalarPlusScalar() || + IsScalarPlusVector() || IsVectorPlusImmediate() || + IsVectorPlusScalar() || IsVectorPlusVector(); +} + + +bool SVEMemOperand::IsEquivalentToScalar() const { + if (IsScalarPlusImmediate()) { + return GetImmediateOffset() == 0; + } + if (IsScalarPlusScalar()) { + // We can ignore the shift because it will still result in zero. + return GetScalarOffset().IsZero(); + } + // Forms involving vectors are never equivalent to a single scalar. + return false; +} + +bool SVEMemOperand::IsPlainRegister() const { + if (IsScalarPlusImmediate()) { + return GetImmediateOffset() == 0; + } + return false; +} + +GenericOperand::GenericOperand(const CPURegister& reg) + : cpu_register_(reg), mem_op_size_(0) { + if (reg.IsQ()) { + VIXL_ASSERT(reg.GetSizeInBits() > static_cast(kXRegSize)); + // Support for Q registers is not implemented yet. + VIXL_UNIMPLEMENTED(); + } +} + + +GenericOperand::GenericOperand(const MemOperand& mem_op, size_t mem_op_size) + : cpu_register_(NoReg), mem_op_(mem_op), mem_op_size_(mem_op_size) { + if (mem_op_size_ > kXRegSizeInBytes) { + // We only support generic operands up to the size of X registers. + VIXL_UNIMPLEMENTED(); + } +} + +bool GenericOperand::Equals(const GenericOperand& other) const { + if (!IsValid() || !other.IsValid()) { + // Two invalid generic operands are considered equal. + return !IsValid() && !other.IsValid(); + } + if (IsCPURegister() && other.IsCPURegister()) { + return GetCPURegister().Is(other.GetCPURegister()); + } else if (IsMemOperand() && other.IsMemOperand()) { + return GetMemOperand().Equals(other.GetMemOperand()) && + (GetMemOperandSizeInBytes() == other.GetMemOperandSizeInBytes()); + } + return false; +} +} +} // namespace vixl::aarch64 diff --git a/3rdparty/vixl/src/aarch64/pointer-auth-aarch64.cc b/3rdparty/vixl/src/aarch64/pointer-auth-aarch64.cc new file mode 100644 index 0000000000..55cf4ca592 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/pointer-auth-aarch64.cc @@ -0,0 +1,197 @@ +// Copyright 2018, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 + +#include "simulator-aarch64.h" + +#include "utils-vixl.h" + +namespace vixl { +namespace aarch64 { + +// Randomly generated example keys for simulating only. +const Simulator::PACKey Simulator::kPACKeyIA = {0xc31718727de20f71, + 0xab9fd4e14b2fec51, + 0}; +const Simulator::PACKey Simulator::kPACKeyIB = {0xeebb163b474e04c8, + 0x5267ac6fc280fb7c, + 1}; +const Simulator::PACKey Simulator::kPACKeyDA = {0x5caef808deb8b1e2, + 0xd347cbc06b7b0f77, + 0}; +const Simulator::PACKey Simulator::kPACKeyDB = {0xe06aa1a949ba8cc7, + 0xcfde69e3db6d0432, + 1}; + +// The general PAC key isn't intended to be used with AuthPAC so we ensure the +// key number is invalid and asserts if used incorrectly. +const Simulator::PACKey Simulator::kPACKeyGA = {0xfcd98a44d564b3d5, + 0x6c56df1904bf0ddc, + -1}; + +static uint64_t GetNibble(uint64_t in_data, int position) { + return (in_data >> position) & 0xf; +} + +static uint64_t ShuffleNibbles(uint64_t in_data) { + static int in_positions[16] = + {4, 36, 52, 40, 44, 0, 24, 12, 56, 60, 8, 32, 16, 28, 20, 48}; + uint64_t out_data = 0; + for (int i = 0; i < 16; i++) { + out_data |= GetNibble(in_data, in_positions[i]) << (4 * i); + } + return out_data; +} + +static uint64_t SubstituteNibbles(uint64_t in_data) { + // Randomly chosen substitutes. + static uint64_t subs[16] = + {4, 7, 3, 9, 10, 14, 0, 1, 15, 2, 8, 6, 12, 5, 11, 13}; + uint64_t out_data = 0; + for (int i = 0; i < 16; i++) { + int index = (in_data >> (4 * i)) & 0xf; + out_data |= subs[index] << (4 * i); + } + return out_data; +} + +// Rotate nibble to the left by the amount specified. +static uint64_t RotNibble(uint64_t in_cell, int amount) { + VIXL_ASSERT((amount >= 0) && (amount <= 3)); + + in_cell &= 0xf; + uint64_t temp = (in_cell << 4) | in_cell; + return (temp >> (4 - amount)) & 0xf; +} + +static uint64_t BigShuffle(uint64_t in_data) { + uint64_t out_data = 0; + for (int i = 0; i < 4; i++) { + uint64_t n12 = GetNibble(in_data, 4 * (i + 12)); + uint64_t n8 = GetNibble(in_data, 4 * (i + 8)); + uint64_t n4 = GetNibble(in_data, 4 * (i + 4)); + uint64_t n0 = GetNibble(in_data, 4 * (i + 0)); + + uint64_t t0 = RotNibble(n8, 2) ^ RotNibble(n4, 1) ^ RotNibble(n0, 1); + uint64_t t1 = RotNibble(n12, 1) ^ RotNibble(n4, 2) ^ RotNibble(n0, 1); + uint64_t t2 = RotNibble(n12, 2) ^ RotNibble(n8, 1) ^ RotNibble(n0, 1); + uint64_t t3 = RotNibble(n12, 1) ^ RotNibble(n8, 1) ^ RotNibble(n4, 2); + + out_data |= t3 << (4 * (i + 0)); + out_data |= t2 << (4 * (i + 4)); + out_data |= t1 << (4 * (i + 8)); + out_data |= t0 << (4 * (i + 12)); + } + return out_data; +} + +// A simple, non-standard hash function invented for simulating. It mixes +// reasonably well, however it is unlikely to be cryptographically secure and +// may have a higher collision chance than other hashing algorithms. +uint64_t Simulator::ComputePAC(uint64_t data, uint64_t context, PACKey key) { + uint64_t working_value = data ^ key.high; + working_value = BigShuffle(working_value); + working_value = ShuffleNibbles(working_value); + working_value ^= key.low; + working_value = ShuffleNibbles(working_value); + working_value = BigShuffle(working_value); + working_value ^= context; + working_value = SubstituteNibbles(working_value); + working_value = BigShuffle(working_value); + working_value = SubstituteNibbles(working_value); + + return working_value; +} + +// The TTBR is selected by bit 63 or 55 depending on TBI for pointers without +// codes, but is always 55 once a PAC code is added to a pointer. For this +// reason, it must be calculated at the call site. +uint64_t Simulator::CalculatePACMask(uint64_t ptr, PointerType type, int ttbr) { + int bottom_pac_bit = GetBottomPACBit(ptr, ttbr); + int top_pac_bit = GetTopPACBit(ptr, type); + return ExtractUnsignedBitfield64(top_pac_bit, + bottom_pac_bit, + 0xffffffffffffffff & ~kTTBRMask) + << bottom_pac_bit; +} + +uint64_t Simulator::AuthPAC(uint64_t ptr, + uint64_t context, + PACKey key, + PointerType type) { + VIXL_ASSERT((key.number == 0) || (key.number == 1)); + + uint64_t pac_mask = CalculatePACMask(ptr, type, (ptr >> 55) & 1); + uint64_t original_ptr = + ((ptr & kTTBRMask) == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask); + + uint64_t pac = ComputePAC(original_ptr, context, key); + + uint64_t error_code = 1 << key.number; + if ((pac & pac_mask) == (ptr & pac_mask)) { + return original_ptr; + } else { + int error_lsb = GetTopPACBit(ptr, type) - 2; + uint64_t error_mask = UINT64_C(0x3) << error_lsb; + return (original_ptr & ~error_mask) | (error_code << error_lsb); + } +} + +uint64_t Simulator::AddPAC(uint64_t ptr, + uint64_t context, + PACKey key, + PointerType type) { + int top_pac_bit = GetTopPACBit(ptr, type); + + // TODO: Properly handle the case where extension bits are bad and TBI is + // turned off, and also test me. + VIXL_ASSERT(HasTBI(ptr, type)); + int ttbr = (ptr >> 55) & 1; + uint64_t pac_mask = CalculatePACMask(ptr, type, ttbr); + uint64_t ext_ptr = (ttbr == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask); + + uint64_t pac = ComputePAC(ext_ptr, context, key); + + // If the pointer isn't all zeroes or all ones in the PAC bitfield, corrupt + // the resulting code. + if (((ptr & (pac_mask | kTTBRMask)) != 0x0) && + ((~ptr & (pac_mask | kTTBRMask)) != 0x0)) { + pac ^= UINT64_C(1) << (top_pac_bit - 1); + } + + uint64_t ttbr_shifted = static_cast(ttbr) << 55; + return (pac & pac_mask) | ttbr_shifted | (ptr & ~pac_mask); +} + +uint64_t Simulator::StripPAC(uint64_t ptr, PointerType type) { + uint64_t pac_mask = CalculatePACMask(ptr, type, (ptr >> 55) & 1); + return ((ptr & kTTBRMask) == 0) ? (ptr & ~pac_mask) : (ptr | pac_mask); +} +} // namespace aarch64 +} // namespace vixl + +#endif // VIXL_INCLUDE_SIMULATOR_AARCH64 diff --git a/3rdparty/vixl/src/aarch64/registers-aarch64.cc b/3rdparty/vixl/src/aarch64/registers-aarch64.cc new file mode 100644 index 0000000000..90201a6031 --- /dev/null +++ b/3rdparty/vixl/src/aarch64/registers-aarch64.cc @@ -0,0 +1,321 @@ +// Copyright 2019, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include + +#include "registers-aarch64.h" + +namespace vixl { +namespace aarch64 { + +std::string CPURegister::GetArchitecturalName() const { + std::ostringstream name; + if (IsZRegister()) { + name << 'z' << GetCode(); + if (HasLaneSize()) { + name << '.' << GetLaneSizeSymbol(); + } + } else if (IsPRegister()) { + name << 'p' << GetCode(); + if (HasLaneSize()) { + name << '.' << GetLaneSizeSymbol(); + } + switch (qualifiers_) { + case kNoQualifiers: + break; + case kMerging: + name << "/m"; + break; + case kZeroing: + name << "/z"; + break; + } + } else { + VIXL_UNIMPLEMENTED(); + } + return name.str(); +} + +unsigned CPURegister::GetMaxCodeFor(CPURegister::RegisterBank bank) { + switch (bank) { + case kNoRegisterBank: + return 0; + case kRRegisterBank: + return Register::GetMaxCode(); + case kVRegisterBank: +#ifdef VIXL_HAS_CONSTEXPR + VIXL_STATIC_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode()); +#else + VIXL_ASSERT(VRegister::GetMaxCode() == ZRegister::GetMaxCode()); +#endif + return VRegister::GetMaxCode(); + case kPRegisterBank: + return PRegister::GetMaxCode(); + } + VIXL_UNREACHABLE(); + return 0; +} + +bool CPURegister::IsValidRegister() const { + return ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode)) && + (bank_ == kRRegisterBank) && + ((size_ == kEncodedWRegSize) || (size_ == kEncodedXRegSize)) && + (qualifiers_ == kNoQualifiers) && (lane_size_ == size_); +} + +bool CPURegister::IsValidVRegister() const { + VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); + return (code_ < kNumberOfVRegisters) && (bank_ == kVRegisterBank) && + ((size_ >= kEncodedBRegSize) && (size_ <= kEncodedQRegSize)) && + (qualifiers_ == kNoQualifiers) && + (lane_size_ != kEncodedUnknownSize) && (lane_size_ <= size_); +} + +bool CPURegister::IsValidFPRegister() const { + return IsValidVRegister() && IsFPRegister(); +} + +bool CPURegister::IsValidZRegister() const { + VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); + // Z registers are valid with or without a lane size, so we don't need to + // check lane_size_. + return (code_ < kNumberOfZRegisters) && (bank_ == kVRegisterBank) && + (size_ == kEncodedUnknownSize) && (qualifiers_ == kNoQualifiers); +} + +bool CPURegister::IsValidPRegister() const { + VIXL_STATIC_ASSERT(kEncodedBRegSize < kEncodedQRegSize); + // P registers are valid with or without a lane size, so we don't need to + // check lane_size_. + return (code_ < kNumberOfPRegisters) && (bank_ == kPRegisterBank) && + (size_ == kEncodedUnknownSize) && + ((qualifiers_ == kNoQualifiers) || (qualifiers_ == kMerging) || + (qualifiers_ == kZeroing)); +} + +bool CPURegister::IsValid() const { + return IsValidRegister() || IsValidVRegister() || IsValidZRegister() || + IsValidPRegister(); +} + +// Most coercions simply invoke the necessary constructor. +#define VIXL_CPUREG_COERCION_LIST(U) \ + U(Register, W, R) \ + U(Register, X, R) \ + U(VRegister, B, V) \ + U(VRegister, H, V) \ + U(VRegister, S, V) \ + U(VRegister, D, V) \ + U(VRegister, Q, V) \ + U(VRegister, V, V) \ + U(ZRegister, Z, V) \ + U(PRegister, P, P) +#define VIXL_DEFINE_CPUREG_COERCION(RET_TYPE, CTOR_TYPE, BANK) \ + RET_TYPE CPURegister::CTOR_TYPE() const { \ + VIXL_ASSERT(GetBank() == k##BANK##RegisterBank); \ + return CTOR_TYPE##Register(GetCode()); \ + } +VIXL_CPUREG_COERCION_LIST(VIXL_DEFINE_CPUREG_COERCION) +#undef VIXL_CPUREG_COERCION_LIST +#undef VIXL_DEFINE_CPUREG_COERCION + +// NEON lane-format coercions always return VRegisters. +#define VIXL_CPUREG_NEON_COERCION_LIST(V) \ + V(8, B) \ + V(16, B) \ + V(2, H) \ + V(4, H) \ + V(8, H) \ + V(2, S) \ + V(4, S) \ + V(1, D) \ + V(2, D) +#define VIXL_DEFINE_CPUREG_NEON_COERCION(LANES, LANE_TYPE) \ + VRegister VRegister::V##LANES##LANE_TYPE() const { \ + VIXL_ASSERT(IsVRegister()); \ + return VRegister(GetCode(), LANES * k##LANE_TYPE##RegSize, LANES); \ + } +VIXL_CPUREG_NEON_COERCION_LIST(VIXL_DEFINE_CPUREG_NEON_COERCION) +#undef VIXL_CPUREG_NEON_COERCION_LIST +#undef VIXL_DEFINE_CPUREG_NEON_COERCION + +// Semantic type coercion for sdot and udot. +// TODO: Use the qualifiers_ field to distinguish this from ::S(). +VRegister VRegister::S4B() const { + VIXL_ASSERT(IsVRegister()); + return SRegister(GetCode()); +} + +bool AreAliased(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + int number_of_valid_regs = 0; + int number_of_valid_vregs = 0; + int number_of_valid_pregs = 0; + + RegList unique_regs = 0; + RegList unique_vregs = 0; + RegList unique_pregs = 0; + + const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8}; + + for (size_t i = 0; i < ArrayLength(regs); i++) { + switch (regs[i].GetBank()) { + case CPURegister::kRRegisterBank: + number_of_valid_regs++; + unique_regs |= regs[i].GetBit(); + break; + case CPURegister::kVRegisterBank: + number_of_valid_vregs++; + unique_vregs |= regs[i].GetBit(); + break; + case CPURegister::kPRegisterBank: + number_of_valid_pregs++; + unique_pregs |= regs[i].GetBit(); + break; + case CPURegister::kNoRegisterBank: + VIXL_ASSERT(regs[i].IsNone()); + break; + } + } + + int number_of_unique_regs = CountSetBits(unique_regs); + int number_of_unique_vregs = CountSetBits(unique_vregs); + int number_of_unique_pregs = CountSetBits(unique_pregs); + + VIXL_ASSERT(number_of_valid_regs >= number_of_unique_regs); + VIXL_ASSERT(number_of_valid_vregs >= number_of_unique_vregs); + VIXL_ASSERT(number_of_valid_pregs >= number_of_unique_pregs); + + return (number_of_valid_regs != number_of_unique_regs) || + (number_of_valid_vregs != number_of_unique_vregs) || + (number_of_valid_pregs != number_of_unique_pregs); +} + +bool AreSameSizeAndType(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameSizeAndType(reg1); + match &= !reg3.IsValid() || reg3.IsSameSizeAndType(reg1); + match &= !reg4.IsValid() || reg4.IsSameSizeAndType(reg1); + match &= !reg5.IsValid() || reg5.IsSameSizeAndType(reg1); + match &= !reg6.IsValid() || reg6.IsSameSizeAndType(reg1); + match &= !reg7.IsValid() || reg7.IsSameSizeAndType(reg1); + match &= !reg8.IsValid() || reg8.IsSameSizeAndType(reg1); + return match; +} + +bool AreEven(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4, + const CPURegister& reg5, + const CPURegister& reg6, + const CPURegister& reg7, + const CPURegister& reg8) { + VIXL_ASSERT(reg1.IsValid()); + bool even = (reg1.GetCode() % 2) == 0; + even &= !reg2.IsValid() || ((reg2.GetCode() % 2) == 0); + even &= !reg3.IsValid() || ((reg3.GetCode() % 2) == 0); + even &= !reg4.IsValid() || ((reg4.GetCode() % 2) == 0); + even &= !reg5.IsValid() || ((reg5.GetCode() % 2) == 0); + even &= !reg6.IsValid() || ((reg6.GetCode() % 2) == 0); + even &= !reg7.IsValid() || ((reg7.GetCode() % 2) == 0); + even &= !reg8.IsValid() || ((reg8.GetCode() % 2) == 0); + return even; +} + +bool AreConsecutive(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + + if (!reg2.IsValid()) { + return true; + } else if (reg2.GetCode() != + ((reg1.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { + return false; + } + + if (!reg3.IsValid()) { + return true; + } else if (reg3.GetCode() != + ((reg2.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { + return false; + } + + if (!reg4.IsValid()) { + return true; + } else if (reg4.GetCode() != + ((reg3.GetCode() + 1) % (reg1.GetMaxCode() + 1))) { + return false; + } + + return true; +} + +bool AreSameFormat(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= !reg2.IsValid() || reg2.IsSameFormat(reg1); + match &= !reg3.IsValid() || reg3.IsSameFormat(reg1); + match &= !reg4.IsValid() || reg4.IsSameFormat(reg1); + return match; +} + +bool AreSameLaneSize(const CPURegister& reg1, + const CPURegister& reg2, + const CPURegister& reg3, + const CPURegister& reg4) { + VIXL_ASSERT(reg1.IsValid()); + bool match = true; + match &= + !reg2.IsValid() || (reg2.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); + match &= + !reg3.IsValid() || (reg3.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); + match &= + !reg4.IsValid() || (reg4.GetLaneSizeInBits() == reg1.GetLaneSizeInBits()); + return match; +} +} +} // namespace vixl::aarch64 diff --git a/3rdparty/vixl/src/code-buffer-vixl.cc b/3rdparty/vixl/src/code-buffer-vixl.cc new file mode 100644 index 0000000000..42a3866c8d --- /dev/null +++ b/3rdparty/vixl/src/code-buffer-vixl.cc @@ -0,0 +1,93 @@ +// Copyright 2017, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "code-buffer-vixl.h" +#include "utils-vixl.h" + +namespace vixl { + + +CodeBuffer::CodeBuffer(byte* buffer, size_t capacity) + : buffer_(reinterpret_cast(buffer)), + cursor_(reinterpret_cast(buffer)), + dirty_(false), + capacity_(capacity) { + VIXL_ASSERT(buffer_ != NULL); +} + + +CodeBuffer::~CodeBuffer() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { + VIXL_ASSERT(!IsDirty()); +} + + +void CodeBuffer::EmitString(const char* string) { + const auto len = strlen(string) + 1; + VIXL_ASSERT(HasSpaceFor(len)); + char* dst = reinterpret_cast(cursor_); + dirty_ = true; + memcpy(dst, string, len); + cursor_ = reinterpret_cast(dst + len); +} + + +void CodeBuffer::EmitData(const void* data, size_t size) { + VIXL_ASSERT(HasSpaceFor(size)); + dirty_ = true; + memcpy(cursor_, data, size); + cursor_ = cursor_ + size; +} + + +void CodeBuffer::UpdateData(size_t offset, const void* data, size_t size) { + dirty_ = true; + byte* dst = buffer_ + offset; + VIXL_ASSERT(dst + size <= cursor_); + memcpy(dst, data, size); +} + + +void CodeBuffer::Align() { + byte* end = AlignUp(cursor_, 4); + const size_t padding_size = end - cursor_; + VIXL_ASSERT(padding_size <= 4); + EmitZeroedBytes(static_cast(padding_size)); +} + +void CodeBuffer::EmitZeroedBytes(int n) { + VIXL_ASSERT(HasSpaceFor(n)); + dirty_ = true; + memset(cursor_, 0, n); + cursor_ += n; +} + +void CodeBuffer::Reset() { + cursor_ = buffer_; + SetClean(); +} + + +} // namespace vixl diff --git a/3rdparty/vixl/src/compiler-intrinsics-vixl.cc b/3rdparty/vixl/src/compiler-intrinsics-vixl.cc new file mode 100644 index 0000000000..f6234fa6bf --- /dev/null +++ b/3rdparty/vixl/src/compiler-intrinsics-vixl.cc @@ -0,0 +1,146 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "compiler-intrinsics-vixl.h" +#include "utils-vixl.h" + +namespace vixl { + + +int CountLeadingSignBitsFallBack(int64_t value, int width) { + VIXL_ASSERT(IsPowerOf2(width) && (width <= 64)); + if (width < 64) VIXL_ASSERT(IsIntN(width, value)); + if (value >= 0) { + return CountLeadingZeros(value, width) - 1; + } else { + return CountLeadingZeros(~value, width) - 1; + } +} + + +int CountLeadingZerosFallBack(uint64_t value, int width) { + VIXL_ASSERT(IsPowerOf2(width) && (width <= 64)); + if (value == 0) { + return width; + } + int count = 0; + value = value << (64 - width); + if ((value & UINT64_C(0xffffffff00000000)) == 0) { + count += 32; + value = value << 32; + } + if ((value & UINT64_C(0xffff000000000000)) == 0) { + count += 16; + value = value << 16; + } + if ((value & UINT64_C(0xff00000000000000)) == 0) { + count += 8; + value = value << 8; + } + if ((value & UINT64_C(0xf000000000000000)) == 0) { + count += 4; + value = value << 4; + } + if ((value & UINT64_C(0xc000000000000000)) == 0) { + count += 2; + value = value << 2; + } + if ((value & UINT64_C(0x8000000000000000)) == 0) { + count += 1; + } + count += (value == 0); + return count; +} + + +int CountSetBitsFallBack(uint64_t value, int width) { + VIXL_ASSERT(IsPowerOf2(width) && (width <= 64)); + + // Mask out unused bits to ensure that they are not counted. + value &= (UINT64_C(0xffffffffffffffff) >> (64 - width)); + + // Add up the set bits. + // The algorithm works by adding pairs of bit fields together iteratively, + // where the size of each bit field doubles each time. + // An example for an 8-bit value: + // Bits: h g f e d c b a + // \ | \ | \ | \ | + // value = h+g f+e d+c b+a + // \ | \ | + // value = h+g+f+e d+c+b+a + // \ | + // value = h+g+f+e+d+c+b+a + const uint64_t kMasks[] = { + UINT64_C(0x5555555555555555), + UINT64_C(0x3333333333333333), + UINT64_C(0x0f0f0f0f0f0f0f0f), + UINT64_C(0x00ff00ff00ff00ff), + UINT64_C(0x0000ffff0000ffff), + UINT64_C(0x00000000ffffffff), + }; + + for (unsigned i = 0; i < (sizeof(kMasks) / sizeof(kMasks[0])); i++) { + int shift = 1 << i; + value = ((value >> shift) & kMasks[i]) + (value & kMasks[i]); + } + + return static_cast(value); +} + + +int CountTrailingZerosFallBack(uint64_t value, int width) { + VIXL_ASSERT(IsPowerOf2(width) && (width <= 64)); + int count = 0; + value = value << (64 - width); + if ((value & UINT64_C(0xffffffff)) == 0) { + count += 32; + value = value >> 32; + } + if ((value & 0xffff) == 0) { + count += 16; + value = value >> 16; + } + if ((value & 0xff) == 0) { + count += 8; + value = value >> 8; + } + if ((value & 0xf) == 0) { + count += 4; + value = value >> 4; + } + if ((value & 0x3) == 0) { + count += 2; + value = value >> 2; + } + if ((value & 0x1) == 0) { + count += 1; + } + count += (value == 0); + return count - (64 - width); +} + + +} // namespace vixl diff --git a/3rdparty/vixl/src/cpu-features.cc b/3rdparty/vixl/src/cpu-features.cc new file mode 100644 index 0000000000..08db3f44b9 --- /dev/null +++ b/3rdparty/vixl/src/cpu-features.cc @@ -0,0 +1,159 @@ +// Copyright 2018, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include "cpu-features.h" +#include "globals-vixl.h" +#include "utils-vixl.h" + +#if defined(__aarch64__) && defined(VIXL_INCLUDE_TARGET_AARCH64) +#include "aarch64/cpu-aarch64.h" +#define VIXL_USE_AARCH64_CPU_HELPERS +#endif + +namespace vixl { + +CPUFeatures CPUFeatures::All() { + CPUFeatures all; + all.features_.set(); + return all; +} + +CPUFeatures CPUFeatures::InferFromIDRegisters() { + // This function assumes that kIDRegisterEmulation is available. + CPUFeatures features(CPUFeatures::kIDRegisterEmulation); +#ifdef VIXL_USE_AARCH64_CPU_HELPERS + // Note that the Linux kernel filters these values during emulation, so the + // results may not exactly match the expected hardware support. + features.Combine(aarch64::CPU::InferCPUFeaturesFromIDRegisters()); +#endif + return features; +} + +CPUFeatures CPUFeatures::InferFromOS(QueryIDRegistersOption option) { +#ifdef VIXL_USE_AARCH64_CPU_HELPERS + return aarch64::CPU::InferCPUFeaturesFromOS(option); +#else + USE(option); + return CPUFeatures(); +#endif +} + +void CPUFeatures::Combine(const CPUFeatures& other) { + features_ |= other.features_; +} + +void CPUFeatures::Combine(Feature feature) { + if (feature != CPUFeatures::kNone) features_.set(feature); +} + +void CPUFeatures::Remove(const CPUFeatures& other) { + features_ &= ~other.features_; +} + +void CPUFeatures::Remove(Feature feature) { + if (feature != CPUFeatures::kNone) features_.reset(feature); +} + +bool CPUFeatures::Has(const CPUFeatures& other) const { + return (features_ & other.features_) == other.features_; +} + +bool CPUFeatures::Has(Feature feature) const { + return (feature == CPUFeatures::kNone) || features_[feature]; +} + +size_t CPUFeatures::Count() const { return features_.count(); } + +std::ostream& operator<<(std::ostream& os, CPUFeatures::Feature feature) { + // clang-format off + switch (feature) { +#define VIXL_FORMAT_FEATURE(SYMBOL, NAME, CPUINFO) \ + case CPUFeatures::SYMBOL: \ + return os << NAME; +VIXL_CPU_FEATURE_LIST(VIXL_FORMAT_FEATURE) +#undef VIXL_FORMAT_FEATURE + case CPUFeatures::kNone: + return os << "none"; + case CPUFeatures::kNumberOfFeatures: + VIXL_UNREACHABLE(); + } + // clang-format on + VIXL_UNREACHABLE(); + return os; +} + +CPUFeatures::const_iterator CPUFeatures::begin() const { + // For iterators in general, it's undefined to increment `end()`, but here we + // control the implementation and it is safe to do this. + return ++end(); +} + +CPUFeatures::const_iterator CPUFeatures::end() const { + return const_iterator(this, kNone); +} + +std::ostream& operator<<(std::ostream& os, const CPUFeatures& features) { + bool need_separator = false; + for (CPUFeatures::Feature feature : features) { + if (need_separator) os << ", "; + need_separator = true; + os << feature; + } + return os; +} + +bool CPUFeaturesConstIterator::operator==( + const CPUFeaturesConstIterator& other) const { + VIXL_ASSERT(IsValid()); + return (cpu_features_ == other.cpu_features_) && (feature_ == other.feature_); +} + +CPUFeaturesConstIterator& CPUFeaturesConstIterator::operator++() { // Prefix + VIXL_ASSERT(IsValid()); + do { + // Find the next feature. The order is unspecified. + feature_ = static_cast(feature_ + 1); + if (feature_ == CPUFeatures::kNumberOfFeatures) { + feature_ = CPUFeatures::kNone; + VIXL_STATIC_ASSERT(CPUFeatures::kNone == -1); + } + VIXL_ASSERT(CPUFeatures::kNone <= feature_); + VIXL_ASSERT(feature_ < CPUFeatures::kNumberOfFeatures); + // cpu_features_->Has(kNone) is always true, so this will terminate even if + // the features list is empty. + } while (!cpu_features_->Has(feature_)); + return *this; +} + +CPUFeaturesConstIterator CPUFeaturesConstIterator::operator++(int) { // Postfix + CPUFeaturesConstIterator result = *this; + ++(*this); + return result; +} + +} // namespace vixl diff --git a/3rdparty/vixl/src/utils-vixl.cc b/3rdparty/vixl/src/utils-vixl.cc new file mode 100644 index 0000000000..639a4b1957 --- /dev/null +++ b/3rdparty/vixl/src/utils-vixl.cc @@ -0,0 +1,555 @@ +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "utils-vixl.h" + +#include + +namespace vixl { + +// The default NaN values (for FPCR.DN=1). +const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000)); +const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000); +const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00); + +// Floating-point zero values. +const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0); +const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000); + +// Floating-point infinity values. +const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00); +const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00); +const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000); +const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000); +const double kFP64PositiveInfinity = + RawbitsToDouble(UINT64_C(0x7ff0000000000000)); +const double kFP64NegativeInfinity = + RawbitsToDouble(UINT64_C(0xfff0000000000000)); + +bool IsZero(Float16 value) { + uint16_t bits = Float16ToRawbits(value); + return (bits == Float16ToRawbits(kFP16PositiveZero) || + bits == Float16ToRawbits(kFP16NegativeZero)); +} + +uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; } + +uint32_t FloatToRawbits(float value) { + uint32_t bits = 0; + memcpy(&bits, &value, 4); + return bits; +} + + +uint64_t DoubleToRawbits(double value) { + uint64_t bits = 0; + memcpy(&bits, &value, 8); + return bits; +} + + +Float16 RawbitsToFloat16(uint16_t bits) { + Float16 f; + f.rawbits_ = bits; + return f; +} + + +float RawbitsToFloat(uint32_t bits) { + float value = 0.0; + memcpy(&value, &bits, 4); + return value; +} + + +double RawbitsToDouble(uint64_t bits) { + double value = 0.0; + memcpy(&value, &bits, 8); + return value; +} + + +uint32_t Float16Sign(internal::SimFloat16 val) { + uint16_t rawbits = Float16ToRawbits(val); + return ExtractUnsignedBitfield32(15, 15, rawbits); +} + + +uint32_t Float16Exp(internal::SimFloat16 val) { + uint16_t rawbits = Float16ToRawbits(val); + return ExtractUnsignedBitfield32(14, 10, rawbits); +} + +uint32_t Float16Mantissa(internal::SimFloat16 val) { + uint16_t rawbits = Float16ToRawbits(val); + return ExtractUnsignedBitfield32(9, 0, rawbits); +} + + +uint32_t FloatSign(float val) { + uint32_t rawbits = FloatToRawbits(val); + return ExtractUnsignedBitfield32(31, 31, rawbits); +} + + +uint32_t FloatExp(float val) { + uint32_t rawbits = FloatToRawbits(val); + return ExtractUnsignedBitfield32(30, 23, rawbits); +} + + +uint32_t FloatMantissa(float val) { + uint32_t rawbits = FloatToRawbits(val); + return ExtractUnsignedBitfield32(22, 0, rawbits); +} + + +uint32_t DoubleSign(double val) { + uint64_t rawbits = DoubleToRawbits(val); + return static_cast(ExtractUnsignedBitfield64(63, 63, rawbits)); +} + + +uint32_t DoubleExp(double val) { + uint64_t rawbits = DoubleToRawbits(val); + return static_cast(ExtractUnsignedBitfield64(62, 52, rawbits)); +} + + +uint64_t DoubleMantissa(double val) { + uint64_t rawbits = DoubleToRawbits(val); + return ExtractUnsignedBitfield64(51, 0, rawbits); +} + + +internal::SimFloat16 Float16Pack(uint16_t sign, + uint16_t exp, + uint16_t mantissa) { + uint16_t bits = (sign << 15) | (exp << 10) | mantissa; + return RawbitsToFloat16(bits); +} + + +float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) { + uint32_t bits = (sign << 31) | (exp << 23) | mantissa; + return RawbitsToFloat(bits); +} + + +double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) { + uint64_t bits = (sign << 63) | (exp << 52) | mantissa; + return RawbitsToDouble(bits); +} + + +int Float16Classify(Float16 value) { + uint16_t bits = Float16ToRawbits(value); + uint16_t exponent_max = (1 << 5) - 1; + uint16_t exponent_mask = exponent_max << 10; + uint16_t mantissa_mask = (1 << 10) - 1; + + uint16_t exponent = (bits & exponent_mask) >> 10; + uint16_t mantissa = bits & mantissa_mask; + if (exponent == 0) { + if (mantissa == 0) { + return FP_ZERO; + } + return FP_SUBNORMAL; + } else if (exponent == exponent_max) { + if (mantissa == 0) { + return FP_INFINITE; + } + return FP_NAN; + } + return FP_NORMAL; +} + + +unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) { + VIXL_ASSERT((reg_size % 8) == 0); + int count = 0; + for (unsigned i = 0; i < (reg_size / 16); i++) { + if ((imm & 0xffff) == 0) { + count++; + } + imm >>= 16; + } + return count; +} + + +int BitCount(uint64_t value) { return CountSetBits(value); } + +// Float16 definitions. + +Float16::Float16(double dvalue) { + rawbits_ = + Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN)); +} + +namespace internal { + +SimFloat16 SimFloat16::operator-() const { + return RawbitsToFloat16(rawbits_ ^ 0x8000); +} + +// SimFloat16 definitions. +SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const { + return static_cast(*this) + static_cast(rhs); +} + +SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const { + return static_cast(*this) - static_cast(rhs); +} + +SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const { + return static_cast(*this) * static_cast(rhs); +} + +SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const { + return static_cast(*this) / static_cast(rhs); +} + +bool SimFloat16::operator<(SimFloat16 rhs) const { + return static_cast(*this) < static_cast(rhs); +} + +bool SimFloat16::operator>(SimFloat16 rhs) const { + return static_cast(*this) > static_cast(rhs); +} + +bool SimFloat16::operator==(SimFloat16 rhs) const { + if (IsNaN(*this) || IsNaN(rhs)) { + return false; + } else if (IsZero(rhs) && IsZero(*this)) { + // +0 and -0 should be treated as equal. + return true; + } + return this->rawbits_ == rhs.rawbits_; +} + +bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); } + +bool SimFloat16::operator==(double rhs) const { + return static_cast(*this) == static_cast(rhs); +} + +SimFloat16::operator double() const { + return FPToDouble(*this, kIgnoreDefaultNaN); +} + +Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); } + +} // namespace internal + +float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) { + uint16_t bits = Float16ToRawbits(value); + uint32_t sign = bits >> 15; + uint32_t exponent = + ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1, + kFloat16MantissaBits, + bits); + uint32_t mantissa = + ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits); + + switch (Float16Classify(value)) { + case FP_ZERO: + return (sign == 0) ? 0.0f : -0.0f; + + case FP_INFINITE: + return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; + + case FP_SUBNORMAL: { + // Calculate shift required to put mantissa into the most-significant bits + // of the destination mantissa. + int shift = CountLeadingZeros(mantissa << (32 - 10)); + + // Shift mantissa and discard implicit '1'. + mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; + mantissa &= (1 << kFloatMantissaBits) - 1; + + // Adjust the exponent for the shift applied, and rebias. + exponent = exponent - shift + (-15 + 127); + break; + } + + case FP_NAN: + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP32DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred entirely, except that the top + // bit is forced to '1', making the result a quiet NaN. The unused + // (low-order) payload bits are set to 0. + exponent = (1 << kFloatExponentBits) - 1; + + // Increase bits in mantissa, making low-order bits 0. + mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); + mantissa |= 1 << 22; // Force a quiet NaN. + break; + + case FP_NORMAL: + // Increase bits in mantissa, making low-order bits 0. + mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); + + // Change exponent bias. + exponent += (-15 + 127); + break; + + default: + VIXL_UNREACHABLE(); + } + return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) | + mantissa); +} + + +float FPToFloat(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + // Only the FPTieEven rounding mode is implemented. + VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); + USE(round_mode); + + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP32DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred as much as possible, except + // that the top bit is forced to '1', making the result a quiet NaN. + uint64_t raw = DoubleToRawbits(value); + + uint32_t sign = raw >> 63; + uint32_t exponent = (1 << 8) - 1; + uint32_t payload = + static_cast(ExtractUnsignedBitfield64(50, 52 - 23, raw)); + payload |= (1 << 22); // Force a quiet NaN. + + return RawbitsToFloat((sign << 31) | (exponent << 23) | payload); + } + + case FP_ZERO: + case FP_INFINITE: { + // In a C++ cast, any value representable in the target type will be + // unchanged. This is always the case for +/-0.0 and infinities. + return static_cast(value); + } + + case FP_NORMAL: + case FP_SUBNORMAL: { + // Convert double-to-float as the processor would, assuming that FPCR.FZ + // (flush-to-zero) is not set. + uint64_t raw = DoubleToRawbits(value); + // Extract the IEEE-754 double components. + uint32_t sign = raw >> 63; + // Extract the exponent and remove the IEEE-754 encoding bias. + int32_t exponent = + static_cast(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; + // Extract the mantissa and add the implicit '1' bit. + uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); + if (std::fpclassify(value) == FP_NORMAL) { + mantissa |= (UINT64_C(1) << 52); + } + return FPRoundToFloat(sign, exponent, mantissa, round_mode); + } + } + + VIXL_UNREACHABLE(); + return static_cast(value); +} + +// TODO: We should consider implementing a full FPToDouble(Float16) +// conversion function (for performance reasons). +double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) { + // We can rely on implicit float to double conversion here. + return FPToFloat(value, DN, exception); +} + + +double FPToDouble(float value, UseDefaultNaN DN, bool* exception) { + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP64DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred entirely, except that the top + // bit is forced to '1', making the result a quiet NaN. The unused + // (low-order) payload bits are set to 0. + uint32_t raw = FloatToRawbits(value); + + uint64_t sign = raw >> 31; + uint64_t exponent = (1 << 11) - 1; + uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw); + payload <<= (52 - 23); // The unused low-order bits should be 0. + payload |= (UINT64_C(1) << 51); // Force a quiet NaN. + + return RawbitsToDouble((sign << 63) | (exponent << 52) | payload); + } + + case FP_ZERO: + case FP_NORMAL: + case FP_SUBNORMAL: + case FP_INFINITE: { + // All other inputs are preserved in a standard cast, because every value + // representable using an IEEE-754 float is also representable using an + // IEEE-754 double. + return static_cast(value); + } + } + + VIXL_UNREACHABLE(); + return static_cast(value); +} + + +Float16 FPToFloat16(float value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + // Only the FPTieEven rounding mode is implemented. + VIXL_ASSERT(round_mode == FPTieEven); + USE(round_mode); + + uint32_t raw = FloatToRawbits(value); + int32_t sign = raw >> 31; + int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127; + uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); + + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP16DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred as much as possible, except + // that the top bit is forced to '1', making the result a quiet NaN. + uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity) + : Float16ToRawbits(kFP16NegativeInfinity); + result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); + result |= (1 << 9); // Force a quiet NaN; + return RawbitsToFloat16(result); + } + + case FP_ZERO: + return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero; + + case FP_INFINITE: + return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; + + case FP_NORMAL: + case FP_SUBNORMAL: { + // Convert float-to-half as the processor would, assuming that FPCR.FZ + // (flush-to-zero) is not set. + + // Add the implicit '1' bit to the mantissa. + mantissa += (1 << 23); + return FPRoundToFloat16(sign, exponent, mantissa, round_mode); + } + } + + VIXL_UNREACHABLE(); + return kFP16PositiveZero; +} + + +Float16 FPToFloat16(double value, + FPRounding round_mode, + UseDefaultNaN DN, + bool* exception) { + // Only the FPTieEven rounding mode is implemented. + VIXL_ASSERT(round_mode == FPTieEven); + USE(round_mode); + + uint64_t raw = DoubleToRawbits(value); + int32_t sign = raw >> 63; + int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023; + uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); + + switch (std::fpclassify(value)) { + case FP_NAN: { + if (IsSignallingNaN(value)) { + if (exception != NULL) { + *exception = true; + } + } + if (DN == kUseDefaultNaN) return kFP16DefaultNaN; + + // Convert NaNs as the processor would: + // - The sign is propagated. + // - The payload (mantissa) is transferred as much as possible, except + // that the top bit is forced to '1', making the result a quiet NaN. + uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity) + : Float16ToRawbits(kFP16NegativeInfinity); + result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); + result |= (1 << 9); // Force a quiet NaN; + return RawbitsToFloat16(result); + } + + case FP_ZERO: + return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero; + + case FP_INFINITE: + return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; + case FP_NORMAL: + case FP_SUBNORMAL: { + // Convert double-to-half as the processor would, assuming that FPCR.FZ + // (flush-to-zero) is not set. + + // Add the implicit '1' bit to the mantissa. + mantissa += (UINT64_C(1) << 52); + return FPRoundToFloat16(sign, exponent, mantissa, round_mode); + } + } + + VIXL_UNREACHABLE(); + return kFP16PositiveZero; +} + +} // namespace vixl diff --git a/3rdparty/vixl/vixl.vcxproj b/3rdparty/vixl/vixl.vcxproj new file mode 100644 index 0000000000..aaa48905c2 --- /dev/null +++ b/3rdparty/vixl/vixl.vcxproj @@ -0,0 +1,93 @@ + + + + + + {8906836E-F06E-46E8-B11A-74E5E8C7B8FB} + Win32Proj + + + + StaticLibrary + $(DefaultPlatformToolset) + ClangCL + MultiByte + true + true + false + + + + + + + + + + + + + + AllRules.ruleset + + + + %(PreprocessorDefinitions) + TurnOffAllWarnings + VIXL_INCLUDE_TARGET_AARCH64;VIXL_CODE_BUFFER_MALLOC;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + _SECURE_SCL_=1;%(PreprocessorDefinitions) + NDEBUG;_SECURE_SCL_=1;%(PreprocessorDefinitions) + NDEBUG;_SECURE_SCL_=0;%(PreprocessorDefinitions) + $(ProjectDir)include;$(ProjectDir)include\vixl;$(ProjectDir)include\vixl\aarch64;%(AdditionalIncludeDirectories) + /Zc:__cplusplus /Zo /utf-8 %(AdditionalOptions) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/3rdparty/vixl/vixl.vcxproj.filters b/3rdparty/vixl/vixl.vcxproj.filters new file mode 100644 index 0000000000..e19bd6cf17 --- /dev/null +++ b/3rdparty/vixl/vixl.vcxproj.filters @@ -0,0 +1,106 @@ + + + + + {bad5c611-84e1-42b6-b20b-828618673b31} + + + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + + + + + + + + + + + + + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + aarch64 + + + + + + + \ No newline at end of file