From cbaca5d7882e842f791d2c7ae14c4c54557f8f68 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Mon, 7 Dec 2020 23:40:05 +1000 Subject: [PATCH] dep: Add spirv-cross --- dep/spirv-cross/GLSL.std.450.h | 131 + dep/spirv-cross/LICENSE | 202 + dep/spirv-cross/README.md | 516 + dep/spirv-cross/main.cpp | 1620 ++ dep/spirv-cross/spirv-cross.vcxproj | 578 + dep/spirv-cross/spirv-cross.vcxproj.filters | 28 + dep/spirv-cross/spirv.h | 2104 +++ dep/spirv-cross/spirv.hpp | 2114 +++ dep/spirv-cross/spirv_cfg.cpp | 404 + dep/spirv-cross/spirv_cfg.hpp | 163 + dep/spirv-cross/spirv_common.hpp | 1805 ++ dep/spirv-cross/spirv_cpp.cpp | 558 + dep/spirv-cross/spirv_cpp.hpp | 93 + dep/spirv-cross/spirv_cross.cpp | 4877 +++++ dep/spirv-cross/spirv_cross.hpp | 1082 ++ dep/spirv-cross/spirv_cross_c.cpp | 2514 +++ dep/spirv-cross/spirv_cross_c.h | 992 + dep/spirv-cross/spirv_cross_containers.hpp | 747 + .../spirv_cross_error_handling.hpp | 94 + dep/spirv-cross/spirv_cross_parsed_ir.cpp | 1059 ++ dep/spirv-cross/spirv_cross_parsed_ir.hpp | 247 + dep/spirv-cross/spirv_cross_util.cpp | 77 + dep/spirv-cross/spirv_cross_util.hpp | 37 + dep/spirv-cross/spirv_glsl.cpp | 15350 ++++++++++++++++ dep/spirv-cross/spirv_glsl.hpp | 903 + dep/spirv-cross/spirv_hlsl.cpp | 5782 ++++++ dep/spirv-cross/spirv_hlsl.hpp | 374 + dep/spirv-cross/spirv_msl.cpp | 14792 +++++++++++++++ dep/spirv-cross/spirv_msl.hpp | 1085 ++ dep/spirv-cross/spirv_parser.cpp | 1183 ++ dep/spirv-cross/spirv_parser.hpp | 101 + dep/spirv-cross/spirv_reflect.cpp | 706 + dep/spirv-cross/spirv_reflect.hpp | 91 + duckstation.sln | 27 + 34 files changed, 62436 insertions(+) create mode 100644 dep/spirv-cross/GLSL.std.450.h create mode 100644 dep/spirv-cross/LICENSE create mode 100644 dep/spirv-cross/README.md create mode 100644 dep/spirv-cross/main.cpp create mode 100644 dep/spirv-cross/spirv-cross.vcxproj create mode 100644 dep/spirv-cross/spirv-cross.vcxproj.filters create mode 100644 dep/spirv-cross/spirv.h create mode 100644 dep/spirv-cross/spirv.hpp create mode 100644 dep/spirv-cross/spirv_cfg.cpp create mode 100644 dep/spirv-cross/spirv_cfg.hpp create mode 100644 dep/spirv-cross/spirv_common.hpp create mode 100644 dep/spirv-cross/spirv_cpp.cpp create mode 100644 dep/spirv-cross/spirv_cpp.hpp create mode 100644 dep/spirv-cross/spirv_cross.cpp create mode 100644 dep/spirv-cross/spirv_cross.hpp create mode 100644 dep/spirv-cross/spirv_cross_c.cpp create mode 100644 dep/spirv-cross/spirv_cross_c.h create mode 100644 dep/spirv-cross/spirv_cross_containers.hpp create mode 100644 dep/spirv-cross/spirv_cross_error_handling.hpp create mode 100644 dep/spirv-cross/spirv_cross_parsed_ir.cpp create mode 100644 dep/spirv-cross/spirv_cross_parsed_ir.hpp create mode 100644 dep/spirv-cross/spirv_cross_util.cpp create mode 100644 dep/spirv-cross/spirv_cross_util.hpp create mode 100644 dep/spirv-cross/spirv_glsl.cpp create mode 100644 dep/spirv-cross/spirv_glsl.hpp create mode 100644 dep/spirv-cross/spirv_hlsl.cpp create mode 100644 dep/spirv-cross/spirv_hlsl.hpp create mode 100644 dep/spirv-cross/spirv_msl.cpp create mode 100644 dep/spirv-cross/spirv_msl.hpp create mode 100644 dep/spirv-cross/spirv_parser.cpp create mode 100644 dep/spirv-cross/spirv_parser.hpp create mode 100644 dep/spirv-cross/spirv_reflect.cpp create mode 100644 dep/spirv-cross/spirv_reflect.hpp diff --git a/dep/spirv-cross/GLSL.std.450.h b/dep/spirv-cross/GLSL.std.450.h new file mode 100644 index 000000000..54cc00e9a --- /dev/null +++ b/dep/spirv-cross/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/dep/spirv-cross/LICENSE b/dep/spirv-cross/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/dep/spirv-cross/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/dep/spirv-cross/README.md b/dep/spirv-cross/README.md new file mode 100644 index 000000000..7f2714301 --- /dev/null +++ b/dep/spirv-cross/README.md @@ -0,0 +1,516 @@ +# SPIRV-Cross + +SPIRV-Cross is a tool designed for parsing and converting SPIR-V to other shader languages. + +[![Build Status](https://travis-ci.org/KhronosGroup/SPIRV-Cross.svg?branch=master)](https://travis-ci.org/KhronosGroup/SPIRV-Cross) +[![Build Status](https://ci.appveyor.com/api/projects/status/github/KhronosGroup/SPIRV-Cross?svg=true&branch=master)](https://ci.appveyor.com/project/HansKristian-Work/SPIRV-Cross) + +## Features + + - Convert SPIR-V to readable, usable and efficient GLSL + - Convert SPIR-V to readable, usable and efficient Metal Shading Language (MSL) + - Convert SPIR-V to readable, usable and efficient HLSL + - Convert SPIR-V to debuggable C++ [DEPRECATED] + - Convert SPIR-V to a JSON reflection format [EXPERIMENTAL] + - Reflection API to simplify the creation of Vulkan pipeline layouts + - Reflection API to modify and tweak OpDecorations + - Supports "all" of vertex, fragment, tessellation, geometry and compute shaders. + +SPIRV-Cross tries hard to emit readable and clean output from the SPIR-V. +The goal is to emit GLSL or MSL that looks like it was written by a human and not awkward IR/assembly-like code. + +NOTE: Individual features are expected to be mostly complete, but it is possible that certain obscure GLSL features are not yet supported. +However, most missing features are expected to be "trivial" improvements at this stage. + +## Building + +SPIRV-Cross has been tested on Linux, iOS/OSX, Windows and Android. CMake is the main build system. + +### Linux and macOS + +Building with CMake is recommended, as it is the only build system which is tested in continuous integration. +It is also the only build system which has install commands and other useful build system features. + +However, you can just run `make` on the command line as a fallback if you only care about the CLI tool. + +A non-ancient GCC (4.8+) or Clang (3.x+) compiler is required as SPIRV-Cross uses C++11 extensively. + +### Windows + +Building with CMake is recommended, which is the only way to target MSVC. +MinGW-w64 based compilation works with `make` as a fallback. + +### Android + +SPIRV-Cross is only useful as a library here. Use the CMake build to link SPIRV-Cross to your project. + +### C++ exceptions + +The make and CMake build flavors offer the option to treat exceptions as assertions. To disable exceptions for make just append `SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=1` to the command line. For CMake append `-DSPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=ON`. By default exceptions are enabled. + +### Static, shared and CLI + +You can use `-DSPIRV_CROSS_STATIC=ON/OFF` `-DSPIRV_CROSS_SHARED=ON/OFF` `-DSPIRV_CROSS_CLI=ON/OFF` to control which modules are built (and installed). + +## Usage + +### Using the C++ API + +The C++ API is the main API for SPIRV-Cross. For more in-depth documentation than what's provided in this README, +please have a look at the [Wiki](https://github.com/KhronosGroup/SPIRV-Cross/wiki). +**NOTE**: This API is not guaranteed to be ABI-stable, and it is highly recommended to link against this API statically. +The API is generally quite stable, but it can change over time, see the C API for more stability. + +To perform reflection and convert to other shader languages you can use the SPIRV-Cross API. +For example: + +```c++ +#include "spirv_glsl.hpp" +#include +#include + +extern std::vector load_spirv_file(); + +int main() +{ + // Read SPIR-V from disk or similar. + std::vector spirv_binary = load_spirv_file(); + + spirv_cross::CompilerGLSL glsl(std::move(spirv_binary)); + + // The SPIR-V is now parsed, and we can perform reflection on it. + spirv_cross::ShaderResources resources = glsl.get_shader_resources(); + + // Get all sampled images in the shader. + for (auto &resource : resources.sampled_images) + { + unsigned set = glsl.get_decoration(resource.id, spv::DecorationDescriptorSet); + unsigned binding = glsl.get_decoration(resource.id, spv::DecorationBinding); + printf("Image %s at set = %u, binding = %u\n", resource.name.c_str(), set, binding); + + // Modify the decoration to prepare it for GLSL. + glsl.unset_decoration(resource.id, spv::DecorationDescriptorSet); + + // Some arbitrary remapping if we want. + glsl.set_decoration(resource.id, spv::DecorationBinding, set * 16 + binding); + } + + // Set some options. + spirv_cross::CompilerGLSL::Options options; + options.version = 310; + options.es = true; + glsl.set_options(options); + + // Compile to GLSL, ready to give to GL driver. + std::string source = glsl.compile(); +} +``` + +### Using the C API wrapper + +To facilitate C compatibility and compatibility with foreign programming languages, a C89-compatible API wrapper is provided. Unlike the C++ API, +the goal of this wrapper is to be fully stable, both API and ABI-wise. +This is the only interface which is supported when building SPIRV-Cross as a shared library. + +An important point of the wrapper is that all memory allocations are contained in the `spvc_context`. +This simplifies the use of the API greatly. However, you should destroy the context as soon as reasonable, +or use `spvc_context_release_allocations()` if you intend to reuse the `spvc_context` object again soon. + +Most functions return a `spvc_result`, where `SPVC_SUCCESS` is the only success code. +For brevity, the code below does not do any error checking. + +```c +#include + +const SpvId *spirv = get_spirv_data(); +size_t word_count = get_spirv_word_count(); + +spvc_context context = NULL; +spvc_parsed_ir ir = NULL; +spvc_compiler compiler_glsl = NULL; +spvc_compiler_options options = NULL; +spvc_resources resources = NULL; +const spvc_reflected_resource *list = NULL; +const char *result = NULL; +size_t count; +size_t i; + +// Create context. +spvc_context_create(&context); + +// Set debug callback. +spvc_context_set_error_callback(context, error_callback, userdata); + +// Parse the SPIR-V. +spvc_context_parse_spirv(context, spirv, word_count, &ir); + +// Hand it off to a compiler instance and give it ownership of the IR. +spvc_context_create_compiler(context, SPVC_BACKEND_GLSL, ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler_glsl); + +// Do some basic reflection. +spvc_compiler_create_shader_resources(compiler_glsl, &resources); +spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &list, &count); + +for (i = 0; i < count; i++) +{ + printf("ID: %u, BaseTypeID: %u, TypeID: %u, Name: %s\n", list[i].id, list[i].base_type_id, list[i].type_id, + list[i].name); + printf(" Set: %u, Binding: %u\n", + spvc_compiler_get_decoration(compiler_glsl, list[i].id, SpvDecorationDescriptorSet), + spvc_compiler_get_decoration(compiler_glsl, list[i].id, SpvDecorationBinding)); +} + +// Modify options. +spvc_compiler_create_compiler_options(context, &options); +spvc_compiler_options_set_uint(options, SPVC_COMPILER_OPTION_GLSL_VERSION, 330); +spvc_compiler_options_set_bool(options, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE); +spvc_compiler_install_compiler_options(compiler_glsl, options); + +spvc_compiler_compile(compiler, &result); +printf("Cross-compiled source: %s\n", result); + +// Frees all memory we allocated so far. +spvc_context_destroy(context); +``` + +### Linking + +#### CMake add_subdirectory() + +This is the recommended way if you are using CMake and want to link against SPIRV-Cross statically. + +#### Integrating SPIRV-Cross in a custom build system + +To add SPIRV-Cross to your own codebase, just copy the source and header files from root directory +and build the relevant .cpp files you need. Make sure to build with C++11 support, e.g. `-std=c++11` in GCC and Clang. +Alternatively, the Makefile generates a libspirv-cross.a static library during build that can be linked in. + +#### Linking against SPIRV-Cross as a system library + +It is possible to link against SPIRV-Cross when it is installed as a system library, +which would be mostly relevant for Unix-like platforms. + +##### pkg-config + +For Unix-based systems, a pkg-config is installed for the C API, e.g.: + +``` +$ pkg-config spirv-cross-c-shared --libs --cflags +-I/usr/local/include/spirv_cross -L/usr/local/lib -lspirv-cross-c-shared +``` + +##### CMake + +If the project is installed, it can be found with `find_package()`, e.g.: + +``` +cmake_minimum_required(VERSION 3.5) +set(CMAKE_C_STANDARD 99) +project(Test LANGUAGES C) + +find_package(spirv_cross_c_shared) +if (spirv_cross_c_shared_FOUND) + message(STATUS "Found SPIRV-Cross C API! :)") +else() + message(STATUS "Could not find SPIRV-Cross C API! :(") +endif() + +add_executable(test test.c) +target_link_libraries(test spirv-cross-c-shared) +``` + +test.c: +```c +#include + +int main(void) +{ + spvc_context context; + spvc_context_create(&context); + spvc_context_destroy(context); +} +``` + +### CLI + +The CLI is suitable for basic cross-compilation tasks, but it cannot support the full flexibility that the API can. +Some examples below. + +#### Creating a SPIR-V file from GLSL with glslang + +``` +glslangValidator -H -V -o test.spv test.frag +``` + +#### Converting a SPIR-V file to GLSL ES + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spirv-cross --version 310 --es test.spv +``` + +#### Converting to desktop GLSL + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spirv-cross --version 330 --no-es test.spv --output test.comp +``` + +#### Disable prettifying optimizations + +``` +glslangValidator -H -V -o test.spv shaders/comp/basic.comp +./spirv-cross --version 310 --es test.spv --output test.comp --force-temporary +``` + +### Using shaders generated from C++ backend + +Please see `samples/cpp` where some GLSL shaders are compiled to SPIR-V, decompiled to C++ and run with test data. +Reading through the samples should explain how to use the C++ interface. +A simple Makefile is included to build all shaders in the directory. + +### Implementation notes + +When using SPIR-V and SPIRV-Cross as an intermediate step for cross-compiling between high level languages there are some considerations to take into account, +as not all features used by one high-level language are necessarily supported natively by the target shader language. +SPIRV-Cross aims to provide the tools needed to handle these scenarios in a clean and robust way, but some manual action is required to maintain compatibility. + +#### HLSL source to GLSL + +##### HLSL entry points + +When using SPIR-V shaders compiled from HLSL, there are some extra things you need to take care of. +First make sure that the entry point is used correctly. +If you forget to set the entry point correctly in glslangValidator (-e MyFancyEntryPoint), +you will likely encounter this error message: + +``` +Cannot end a function before ending the current block. +Likely cause: If this SPIR-V was created from glslang HLSL, make sure the entry point is valid. +``` + +##### Vertex/Fragment interface linking + +HLSL relies on semantics in order to effectively link together shader stages. In the SPIR-V generated by glslang, the transformation from HLSL to GLSL ends up looking like + +```c++ +struct VSOutput { + // SV_Position is rerouted to gl_Position + float4 position : SV_Position; + float4 coord : TEXCOORD0; +}; + +VSOutput main(...) {} +``` + +```c++ +struct VSOutput { + float4 coord; +} +layout(location = 0) out VSOutput _magicNameGeneratedByGlslang; +``` + +While this works, be aware of the type of the struct which is used in the vertex stage and the fragment stage. +There may be issues if the structure type name differs in vertex stage and fragment stage. + +You can make use of the reflection interface to force the name of the struct type. + +``` +// Something like this for both vertex outputs and fragment inputs. +compiler.set_name(varying_resource.base_type_id, "VertexFragmentLinkage"); +``` + +Some platform may require identical variable name for both vertex outputs and fragment inputs. (for example MacOSX) +to rename variable base on location, please add +``` +--rename-interface-variable +``` + +#### HLSL source to legacy GLSL/ESSL + +HLSL tends to emit varying struct types to pass data between vertex and fragment. +This is not supported in legacy GL/GLES targets, so to support this, varying structs are flattened. +This is done automatically, but the API user might need to be aware that this is happening in order to support all cases. + +Modern GLES code like this: +```c++ +struct Output { + vec4 a; + vec2 b; +}; +out Output vout; +``` + +Is transformed into: +```c++ +struct Output { + vec4 a; + vec2 b; +}; +varying vec4 Output_a; +varying vec2 Output_b; +``` + +Note that now, both the struct name and the member names will participate in the linking interface between vertex and fragment, so +API users might want to ensure that both the struct names and member names match so that vertex outputs and fragment inputs can link properly. + + +#### Separate image samplers (HLSL/Vulkan) for backends which do not support it (GLSL) + +Another thing you need to remember is when using samplers and textures in HLSL these are separable, and not directly compatible with GLSL. If you need to use this with desktop GL/GLES, you need to call `Compiler::build_combined_image_samplers` first before calling `Compiler::compile`, or you will get an exception. + +```c++ +// From main.cpp +// Builds a mapping for all combinations of images and samplers. +compiler->build_combined_image_samplers(); + +// Give the remapped combined samplers new names. +// Here you can also set up decorations if you want (binding = #N). +for (auto &remap : compiler->get_combined_image_samplers()) +{ + compiler->set_name(remap.combined_id, join("SPIRV_Cross_Combined", compiler->get_name(remap.image_id), + compiler->get_name(remap.sampler_id))); +} +``` + +If your target is Vulkan GLSL, `--vulkan-semantics` will emit separate image samplers as you'd expect. +The command line client calls `Compiler::build_combined_image_samplers` automatically, but if you're calling the library, you'll need to do this yourself. + +#### Descriptor sets (Vulkan GLSL) for backends which do not support them (HLSL/GLSL/Metal) + +Descriptor sets are unique to Vulkan, so make sure that descriptor set + binding is remapped to a flat binding scheme (set always 0), so that other APIs can make sense of the bindings. +This can be done with `Compiler::set_decoration(id, spv::DecorationDescriptorSet)`. + +#### Linking by name for targets which do not support explicit locations (legacy GLSL/ESSL) + +Modern GLSL and HLSL sources (and SPIR-V) relies on explicit layout(location) qualifiers to guide the linking process between shader stages, +but older GLSL relies on symbol names to perform the linking. When emitting shaders with older versions, these layout statements will be removed, +so it is important that the API user ensures that the names of I/O variables are sanitized so that linking will work properly. +The reflection API can rename variables, struct types and struct members to deal with these scenarios using `Compiler::set_name` and friends. + +#### Clip-space conventions + +SPIRV-Cross can perform some common clip space conversions on gl_Position/SV_Position by enabling `CompilerGLSL::Options.vertex.fixup_clipspace`. +While this can be convenient, it is recommended to modify the projection matrices instead as that can achieve the same result. + +For GLSL targets, enabling this will convert a shader which assumes `[0, w]` depth range (Vulkan / D3D / Metal) into `[-w, w]` range. +For MSL and HLSL targets, enabling this will convert a shader in `[-w, w]` depth range (OpenGL) to `[0, w]` depth range. + +By default, the CLI will not enable `fixup_clipspace`, but in the API you might want to set an explicit value using `CompilerGLSL::set_options()`. + +Y-flipping of gl_Position and similar is also supported. +The use of this is discouraged, because relying on vertex shader Y-flipping tends to get quite messy. +To enable this, set `CompilerGLSL::Options.vertex.flip_vert_y` or `--flip-vert-y` in CLI. + +#### Reserved identifiers + +When cross-compiling, certain identifiers are considered to be reserved by the implementation. +Code generated by SPIRV-Cross cannot emit these identifiers as they are reserved and used for various internal purposes, +and such variables will typically show up as `_RESERVED_IDENTIFIER_FIXUP_` +or some similar name to make it more obvious that an identifier has been renamed. + +Reflection output will follow the exact name specified in the SPIR-V module. It might not be a valid identifier in the C sense, +as it may contain non-alphanumeric/non-underscore characters. + +Reserved identifiers currently assumed by the implementation are (in pseudo-regex): + +- _$digit+, e.g. `_100`, `_2` +- _$digit+_.+, e.g. `_100_tmp`, `_2_foobar`. `_2Bar` is **not** reserved. +- gl_- prefix +- spv- prefix +- SPIRV_Cross prefix. This prefix is generally used for interface variables where app needs to provide data for workaround purposes. + This identifier will not be rewritten, but be aware of potential collisions. +- Double underscores (reserved by all target languages). + +Members of structs also have a reserved identifier: +- _m$digit+$END, e.g. `_m20` and `_m40` are reserved, but not `_m40Foobar`. + +## Contributing + +Contributions to SPIRV-Cross are welcome. See Testing and Licensing sections for details. + +### Testing + +SPIRV-Cross maintains a test suite of shaders with reference output of how the output looks after going through a roundtrip through +glslangValidator/spirv-as then back through SPIRV-Cross again. +The reference files are stored inside the repository in order to be able to track regressions. + +All pull requests should ensure that test output does not change unexpectedly. This can be tested with: + +``` +./checkout_glslang_spirv_tools.sh # Checks out glslang and SPIRV-Tools at a fixed revision which matches the reference output. + # NOTE: Some users have reported problems cloning from git:// paths. To use https:// instead pass in + # $ PROTOCOL=https ./checkout_glslang_spirv_tools.sh + # instead. +./build_glslang_spirv_tools.sh # Builds glslang and SPIRV-Tools. +./test_shaders.sh # Runs over all changes and makes sure that there are no deltas compared to reference files. +``` + +`./test_shaders.sh` currently requires a Makefile setup with GCC/Clang to be set up. +However, on Windows, this can be rather inconvenient if a MinGW environment is not set up. +To use a spirv-cross binary you built with CMake (or otherwise), you can pass in an environment variable as such: + +``` +SPIRV_CROSS_PATH=path/to/custom/spirv-cross ./test_shaders.sh +``` + +However, when improving SPIRV-Cross there are of course legitimate cases where reference output should change. +In these cases, run: + +``` +./update_test_shaders.sh # SPIRV_CROSS_PATH also works here. +``` + +to update the reference files and include these changes as part of the pull request. +Always make sure you are running the correct version of glslangValidator as well as SPIRV-Tools when updating reference files. +See `checkout_glslang_spirv_tools.sh` which revisions are currently expected. The revisions change regularly. + +In short, the master branch should always be able to run `./test_shaders.py shaders` and friends without failure. +SPIRV-Cross uses Travis CI to test all pull requests, so it is not strictly needed to perform testing yourself if you have problems running it locally. +A pull request which does not pass testing on Travis will not be accepted however. + +When adding support for new features to SPIRV-Cross, a new shader and reference file should be added which covers usage of the new shader features in question. +Travis CI runs the test suite with the CMake, by running `ctest`. This is a more straight-forward alternative to `./test_shaders.sh`. + +### Licensing + +Contributors of new files should add a copyright header at the top of every new source code file with their copyright +along with the Apache 2.0 licensing stub. + +### Formatting + +SPIRV-Cross uses `clang-format` to automatically format code. +Please use `clang-format` with the style sheet found in `.clang-format` to automatically format code before submitting a pull request. + +To make things easy, the `format_all.sh` script can be used to format all +source files in the library. In this directory, run the following from the +command line: + + ./format_all.sh + +## Regression testing + +In shaders/ a collection of shaders are maintained for purposes of regression testing. +The current reference output is contained in reference/. +`./test_shaders.py shaders` can be run to perform regression testing. + +See `./test_shaders.py --help` for more. + +### Metal backend + +To test the roundtrip path GLSL -> SPIR-V -> MSL, `--msl` can be added, e.g. `./test_shaders.py --msl shaders-msl`. + +### HLSL backend + +To test the roundtrip path GLSL -> SPIR-V -> HLSL, `--hlsl` can be added, e.g. `./test_shaders.py --hlsl shaders-hlsl`. + +### Updating regression tests + +When legitimate changes are found, use `--update` flag to update regression files. +Otherwise, `./test_shaders.py` will fail with error code. + +### Mali Offline Compiler cycle counts + +To obtain a CSV of static shader cycle counts before and after going through spirv-cross, add +`--malisc` flag to `./test_shaders`. This requires the Mali Offline Compiler to be installed in PATH. + diff --git a/dep/spirv-cross/main.cpp b/dep/spirv-cross/main.cpp new file mode 100644 index 000000000..6f511358b --- /dev/null +++ b/dep/spirv-cross/main.cpp @@ -0,0 +1,1620 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_cpp.hpp" +#include "spirv_cross_util.hpp" +#include "spirv_glsl.hpp" +#include "spirv_hlsl.hpp" +#include "spirv_msl.hpp" +#include "spirv_parser.hpp" +#include "spirv_reflect.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_SPIRV_CROSS_GIT_VERSION +#include "gitversion.h" +#endif + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +#ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +static inline void THROW(const char *str) +{ + fprintf(stderr, "SPIRV-Cross will abort: %s\n", str); + fflush(stderr); + abort(); +} +#else +#define THROW(x) throw runtime_error(x) +#endif + +struct CLIParser; +struct CLICallbacks +{ + void add(const char *cli, const function &func) + { + callbacks[cli] = func; + } + unordered_map> callbacks; + function error_handler; + function default_handler; +}; + +struct CLIParser +{ + CLIParser(CLICallbacks cbs_, int argc_, char *argv_[]) + : cbs(move(cbs_)) + , argc(argc_) + , argv(argv_) + { + } + + bool parse() + { +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS + try +#endif + { + while (argc && !ended_state) + { + const char *next = *argv++; + argc--; + + if (*next != '-' && cbs.default_handler) + { + cbs.default_handler(next); + } + else + { + auto itr = cbs.callbacks.find(next); + if (itr == ::end(cbs.callbacks)) + { + THROW("Invalid argument"); + } + + itr->second(*this); + } + } + + return true; + } +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS + catch (...) + { + if (cbs.error_handler) + { + cbs.error_handler(); + } + return false; + } +#endif + } + + void end() + { + ended_state = true; + } + + uint32_t next_uint() + { + if (!argc) + { + THROW("Tried to parse uint, but nothing left in arguments"); + } + + uint64_t val = stoul(*argv); + if (val > numeric_limits::max()) + { + THROW("next_uint() out of range"); + } + + argc--; + argv++; + + return uint32_t(val); + } + + uint32_t next_hex_uint() + { + if (!argc) + { + THROW("Tried to parse uint, but nothing left in arguments"); + } + + uint64_t val = stoul(*argv, nullptr, 16); + if (val > numeric_limits::max()) + { + THROW("next_uint() out of range"); + } + + argc--; + argv++; + + return uint32_t(val); + } + + double next_double() + { + if (!argc) + { + THROW("Tried to parse double, but nothing left in arguments"); + } + + double val = stod(*argv); + + argc--; + argv++; + + return val; + } + + // Return a string only if it's not prefixed with `--`, otherwise return the default value + const char *next_value_string(const char *default_value) + { + if (!argc) + { + return default_value; + } + + if (0 == strncmp("--", *argv, 2)) + { + return default_value; + } + + return next_string(); + } + + const char *next_string() + { + if (!argc) + { + THROW("Tried to parse string, but nothing left in arguments"); + } + + const char *ret = *argv; + argc--; + argv++; + return ret; + } + + CLICallbacks cbs; + int argc; + char **argv; + bool ended_state = false; +}; + +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +static vector read_spirv_file(const char *path) +{ + FILE *file = fopen(path, "rb"); + if (!file) + { + fprintf(stderr, "Failed to open SPIR-V file: %s\n", path); + return {}; + } + + fseek(file, 0, SEEK_END); + long len = ftell(file) / sizeof(uint32_t); + rewind(file); + + vector spirv(len); + if (fread(spirv.data(), sizeof(uint32_t), len, file) != size_t(len)) + spirv.clear(); + + fclose(file); + return spirv; +} + +static bool write_string_to_file(const char *path, const char *string) +{ + FILE *file = fopen(path, "w"); + if (!file) + { + fprintf(stderr, "Failed to write file: %s\n", path); + return false; + } + + fprintf(file, "%s", string); + fclose(file); + return true; +} + +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#endif + +static void print_resources(const Compiler &compiler, const char *tag, const SmallVector &resources) +{ + fprintf(stderr, "%s\n", tag); + fprintf(stderr, "=============\n\n"); + bool print_ssbo = !strcmp(tag, "ssbos"); + + for (auto &res : resources) + { + auto &type = compiler.get_type(res.type_id); + + if (print_ssbo && compiler.buffer_is_hlsl_counter_buffer(res.id)) + continue; + + // If we don't have a name, use the fallback for the type instead of the variable + // for SSBOs and UBOs since those are the only meaningful names to use externally. + // Push constant blocks are still accessed by name and not block name, even though they are technically Blocks. + bool is_push_constant = compiler.get_storage_class(res.id) == StorageClassPushConstant; + bool is_block = compiler.get_decoration_bitset(type.self).get(DecorationBlock) || + compiler.get_decoration_bitset(type.self).get(DecorationBufferBlock); + bool is_sized_block = is_block && (compiler.get_storage_class(res.id) == StorageClassUniform || + compiler.get_storage_class(res.id) == StorageClassUniformConstant); + ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id); + + uint32_t block_size = 0; + uint32_t runtime_array_stride = 0; + if (is_sized_block) + { + auto &base_type = compiler.get_type(res.base_type_id); + block_size = uint32_t(compiler.get_declared_struct_size(base_type)); + runtime_array_stride = uint32_t(compiler.get_declared_struct_size_runtime_array(base_type, 1) - + compiler.get_declared_struct_size_runtime_array(base_type, 0)); + } + + Bitset mask; + if (print_ssbo) + mask = compiler.get_buffer_block_flags(res.id); + else + mask = compiler.get_decoration_bitset(res.id); + + string array; + for (auto arr : type.array) + array = join("[", arr ? convert_to_string(arr) : "", "]") + array; + + fprintf(stderr, " ID %03u : %s%s", uint32_t(res.id), + !res.name.empty() ? res.name.c_str() : compiler.get_fallback_name(fallback_id).c_str(), array.c_str()); + + if (mask.get(DecorationLocation)) + fprintf(stderr, " (Location : %u)", compiler.get_decoration(res.id, DecorationLocation)); + if (mask.get(DecorationDescriptorSet)) + fprintf(stderr, " (Set : %u)", compiler.get_decoration(res.id, DecorationDescriptorSet)); + if (mask.get(DecorationBinding)) + fprintf(stderr, " (Binding : %u)", compiler.get_decoration(res.id, DecorationBinding)); + if (static_cast(compiler).variable_is_depth_or_compare(res.id)) + fprintf(stderr, " (comparison)"); + if (mask.get(DecorationInputAttachmentIndex)) + fprintf(stderr, " (Attachment : %u)", compiler.get_decoration(res.id, DecorationInputAttachmentIndex)); + if (mask.get(DecorationNonReadable)) + fprintf(stderr, " writeonly"); + if (mask.get(DecorationNonWritable)) + fprintf(stderr, " readonly"); + if (is_sized_block) + { + fprintf(stderr, " (BlockSize : %u bytes)", block_size); + if (runtime_array_stride) + fprintf(stderr, " (Unsized array stride: %u bytes)", runtime_array_stride); + } + + uint32_t counter_id = 0; + if (print_ssbo && compiler.buffer_get_hlsl_counter_buffer(res.id, counter_id)) + fprintf(stderr, " (HLSL counter buffer ID: %u)", counter_id); + fprintf(stderr, "\n"); + } + fprintf(stderr, "=============\n\n"); +} + +static const char *execution_model_to_str(spv::ExecutionModel model) +{ + switch (model) + { + case spv::ExecutionModelVertex: + return "vertex"; + case spv::ExecutionModelTessellationControl: + return "tessellation control"; + case ExecutionModelTessellationEvaluation: + return "tessellation evaluation"; + case ExecutionModelGeometry: + return "geometry"; + case ExecutionModelFragment: + return "fragment"; + case ExecutionModelGLCompute: + return "compute"; + case ExecutionModelRayGenerationNV: + return "raygenNV"; + case ExecutionModelIntersectionNV: + return "intersectionNV"; + case ExecutionModelCallableNV: + return "callableNV"; + case ExecutionModelAnyHitNV: + return "anyhitNV"; + case ExecutionModelClosestHitNV: + return "closesthitNV"; + case ExecutionModelMissNV: + return "missNV"; + default: + return "???"; + } +} + +static void print_resources(const Compiler &compiler, const ShaderResources &res) +{ + auto &modes = compiler.get_execution_mode_bitset(); + + fprintf(stderr, "Entry points:\n"); + auto entry_points = compiler.get_entry_points_and_stages(); + for (auto &e : entry_points) + fprintf(stderr, " %s (%s)\n", e.name.c_str(), execution_model_to_str(e.execution_model)); + fprintf(stderr, "\n"); + + fprintf(stderr, "Execution modes:\n"); + modes.for_each_bit([&](uint32_t i) { + auto mode = static_cast(i); + uint32_t arg0 = compiler.get_execution_mode_argument(mode, 0); + uint32_t arg1 = compiler.get_execution_mode_argument(mode, 1); + uint32_t arg2 = compiler.get_execution_mode_argument(mode, 2); + + switch (static_cast(i)) + { + case ExecutionModeInvocations: + fprintf(stderr, " Invocations: %u\n", arg0); + break; + + case ExecutionModeLocalSize: + fprintf(stderr, " LocalSize: (%u, %u, %u)\n", arg0, arg1, arg2); + break; + + case ExecutionModeOutputVertices: + fprintf(stderr, " OutputVertices: %u\n", arg0); + break; + +#define CHECK_MODE(m) \ + case ExecutionMode##m: \ + fprintf(stderr, " %s\n", #m); \ + break + CHECK_MODE(SpacingEqual); + CHECK_MODE(SpacingFractionalEven); + CHECK_MODE(SpacingFractionalOdd); + CHECK_MODE(VertexOrderCw); + CHECK_MODE(VertexOrderCcw); + CHECK_MODE(PixelCenterInteger); + CHECK_MODE(OriginUpperLeft); + CHECK_MODE(OriginLowerLeft); + CHECK_MODE(EarlyFragmentTests); + CHECK_MODE(PointMode); + CHECK_MODE(Xfb); + CHECK_MODE(DepthReplacing); + CHECK_MODE(DepthGreater); + CHECK_MODE(DepthLess); + CHECK_MODE(DepthUnchanged); + CHECK_MODE(LocalSizeHint); + CHECK_MODE(InputPoints); + CHECK_MODE(InputLines); + CHECK_MODE(InputLinesAdjacency); + CHECK_MODE(Triangles); + CHECK_MODE(InputTrianglesAdjacency); + CHECK_MODE(Quads); + CHECK_MODE(Isolines); + CHECK_MODE(OutputPoints); + CHECK_MODE(OutputLineStrip); + CHECK_MODE(OutputTriangleStrip); + CHECK_MODE(VecTypeHint); + CHECK_MODE(ContractionOff); + + default: + break; + } + }); + fprintf(stderr, "\n"); + + print_resources(compiler, "subpass inputs", res.subpass_inputs); + print_resources(compiler, "inputs", res.stage_inputs); + print_resources(compiler, "outputs", res.stage_outputs); + print_resources(compiler, "textures", res.sampled_images); + print_resources(compiler, "separate images", res.separate_images); + print_resources(compiler, "separate samplers", res.separate_samplers); + print_resources(compiler, "images", res.storage_images); + print_resources(compiler, "ssbos", res.storage_buffers); + print_resources(compiler, "ubos", res.uniform_buffers); + print_resources(compiler, "push", res.push_constant_buffers); + print_resources(compiler, "counters", res.atomic_counters); + print_resources(compiler, "acceleration structures", res.acceleration_structures); +} + +static void print_push_constant_resources(const Compiler &compiler, const SmallVector &res) +{ + for (auto &block : res) + { + auto ranges = compiler.get_active_buffer_ranges(block.id); + fprintf(stderr, "Active members in buffer: %s\n", + !block.name.empty() ? block.name.c_str() : compiler.get_fallback_name(block.id).c_str()); + + fprintf(stderr, "==================\n\n"); + for (auto &range : ranges) + { + const auto &name = compiler.get_member_name(block.base_type_id, range.index); + + fprintf(stderr, "Member #%3u (%s): Offset: %4u, Range: %4u\n", range.index, + !name.empty() ? name.c_str() : compiler.get_fallback_member_name(range.index).c_str(), + unsigned(range.offset), unsigned(range.range)); + } + fprintf(stderr, "==================\n\n"); + } +} + +static void print_spec_constants(const Compiler &compiler) +{ + auto spec_constants = compiler.get_specialization_constants(); + fprintf(stderr, "Specialization constants\n"); + fprintf(stderr, "==================\n\n"); + for (auto &c : spec_constants) + fprintf(stderr, "ID: %u, Spec ID: %u\n", uint32_t(c.id), c.constant_id); + fprintf(stderr, "==================\n\n"); +} + +static void print_capabilities_and_extensions(const Compiler &compiler) +{ + fprintf(stderr, "Capabilities\n"); + fprintf(stderr, "============\n"); + for (auto &capability : compiler.get_declared_capabilities()) + fprintf(stderr, "Capability: %u\n", static_cast(capability)); + fprintf(stderr, "============\n\n"); + + fprintf(stderr, "Extensions\n"); + fprintf(stderr, "============\n"); + for (auto &ext : compiler.get_declared_extensions()) + fprintf(stderr, "Extension: %s\n", ext.c_str()); + fprintf(stderr, "============\n\n"); +} + +struct PLSArg +{ + PlsFormat format; + string name; +}; + +struct Remap +{ + string src_name; + string dst_name; + unsigned components; +}; + +struct VariableTypeRemap +{ + string variable_name; + string new_variable_type; +}; + +struct InterfaceVariableRename +{ + StorageClass storageClass; + uint32_t location; + string variable_name; +}; + +struct CLIArguments +{ + const char *input = nullptr; + const char *output = nullptr; + const char *cpp_interface_name = nullptr; + uint32_t version = 0; + uint32_t shader_model = 0; + uint32_t msl_version = 0; + bool es = false; + bool set_version = false; + bool set_shader_model = false; + bool set_msl_version = false; + bool set_es = false; + bool dump_resources = false; + bool force_temporary = false; + bool flatten_ubo = false; + bool fixup = false; + bool yflip = false; + bool sso = false; + bool support_nonzero_baseinstance = true; + bool msl_capture_output_to_buffer = false; + bool msl_swizzle_texture_samples = false; + bool msl_ios = false; + bool msl_pad_fragment_output = false; + bool msl_domain_lower_left = false; + bool msl_argument_buffers = false; + bool msl_texture_buffer_native = false; + bool msl_framebuffer_fetch = false; + bool msl_invariant_float_math = false; + bool msl_emulate_cube_array = false; + bool msl_multiview = false; + bool msl_multiview_layered_rendering = true; + bool msl_view_index_from_device_index = false; + bool msl_dispatch_base = false; + bool msl_decoration_binding = false; + bool msl_force_active_argument_buffer_resources = false; + bool msl_force_native_arrays = false; + bool msl_enable_frag_depth_builtin = true; + bool msl_enable_frag_stencil_ref_builtin = true; + uint32_t msl_enable_frag_output_mask = 0xffffffff; + bool msl_enable_clip_distance_user_varying = true; + bool msl_multi_patch_workgroup = false; + bool msl_vertex_for_tessellation = false; + uint32_t msl_additional_fixed_sample_mask = 0xffffffff; + bool msl_arrayed_subpass_input = false; + uint32_t msl_r32ui_linear_texture_alignment = 4; + uint32_t msl_r32ui_alignment_constant_id = 65535; + bool msl_texture_1d_as_2d = false; + bool msl_ios_use_simdgroup_functions = false; + bool msl_emulate_subgroups = false; + uint32_t msl_fixed_subgroup_size = 0; + bool msl_force_sample_rate_shading = false; + bool glsl_emit_push_constant_as_ubo = false; + bool glsl_emit_ubo_as_plain_uniforms = false; + bool glsl_force_flattened_io_blocks = false; + SmallVector> glsl_ext_framebuffer_fetch; + bool vulkan_glsl_disable_ext_samplerless_texture_functions = false; + bool emit_line_directives = false; + bool enable_storage_image_qualifier_deduction = true; + bool force_zero_initialized_variables = false; + SmallVector msl_discrete_descriptor_sets; + SmallVector msl_device_argument_buffers; + SmallVector> msl_dynamic_buffers; + SmallVector> msl_inline_uniform_blocks; + SmallVector msl_shader_inputs; + SmallVector pls_in; + SmallVector pls_out; + SmallVector remaps; + SmallVector extensions; + SmallVector variable_type_remaps; + SmallVector interface_variable_renames; + SmallVector hlsl_attr_remap; + string entry; + string entry_stage; + + struct Rename + { + string old_name; + string new_name; + ExecutionModel execution_model; + }; + SmallVector entry_point_rename; + + uint32_t iterations = 1; + bool cpp = false; + string reflect; + bool msl = false; + bool hlsl = false; + bool hlsl_compat = false; + bool hlsl_support_nonzero_base = false; + bool hlsl_force_storage_buffer_as_uav = false; + bool hlsl_nonwritable_uav_texture_as_srv = false; + bool hlsl_enable_16bit_types = false; + bool hlsl_flatten_matrix_vertex_input_semantics = false; + HLSLBindingFlags hlsl_binding_flags = 0; + bool vulkan_semantics = false; + bool flatten_multidimensional_arrays = false; + bool use_420pack_extension = true; + bool remove_unused = false; + bool combined_samplers_inherit_bindings = false; +}; + +static void print_version() +{ +#ifdef HAVE_SPIRV_CROSS_GIT_VERSION + fprintf(stderr, "%s\n", SPIRV_CROSS_GIT_REVISION); +#else + fprintf(stderr, "Git revision unknown. Build with CMake to create timestamp and revision info.\n"); +#endif +} + +static void print_help_backend() +{ + // clang-format off + fprintf(stderr, "\nSelect backend:\n" + "\tBy default, OpenGL-style GLSL is the target, with #version and GLSL/ESSL information inherited from the SPIR-V module if present.\n" + "\t[--vulkan-semantics] or [-V]:\n\t\tEmit Vulkan GLSL instead of plain GLSL. Makes use of Vulkan-only features to match SPIR-V.\n" + "\t[--msl]:\n\t\tEmit Metal Shading Language (MSL).\n" + "\t[--hlsl]:\n\t\tEmit HLSL.\n" + "\t[--reflect]:\n\t\tEmit JSON reflection.\n" + "\t[--cpp]:\n\t\tDEPRECATED. Emits C++ code.\n" + ); + // clang-format on +} + +static void print_help_glsl() +{ + // clang-format off + fprintf(stderr, "\nGLSL options:\n" + "\t[--es]:\n\t\tForce ESSL.\n" + "\t[--no-es]:\n\t\tForce desktop GLSL.\n" + "\t[--version ]:\n\t\tE.g. --version 450 will emit '#version 450' in shader.\n" + "\t\tCode generation will depend on the version used.\n" + "\t[--flatten-ubo]:\n\t\tEmit UBOs as plain uniform arrays which are suitable for use with glUniform4*v().\n" + "\t\tThis can be an optimization on GL implementations where this is faster or works around buggy driver implementations.\n" + "\t\tE.g.: uniform MyUBO { vec4 a; float b, c, d, e; }; will be emitted as uniform vec4 MyUBO[2];\n" + "\t\tCaveat: You cannot mix and match floating-point and integer in the same UBO with this option.\n" + "\t\tLegacy GLSL/ESSL (where this flattening makes sense) does not support bit-casting, which would have been the obvious workaround.\n" + "\t[--extension ext]:\n\t\tAdd #extension string of your choosing to GLSL output.\n" + "\t\tUseful if you use variable name remapping to something that requires an extension unknown to SPIRV-Cross.\n" + "\t[--remove-unused-variables]:\n\t\tDo not emit interface variables which are not statically accessed by the shader.\n" + "\t[--separate-shader-objects]:\n\t\tRedeclare gl_PerVertex blocks to be suitable for desktop GL separate shader objects.\n" + "\t[--glsl-emit-push-constant-as-ubo]:\n\t\tInstead of a plain uniform of struct for push constants, emit a UBO block instead.\n" + "\t[--glsl-emit-ubo-as-plain-uniforms]:\n\t\tInstead of emitting UBOs, emit them as plain uniform structs.\n" + "\t[--glsl-remap-ext-framebuffer-fetch input-attachment color-location]:\n\t\tRemaps an input attachment to use GL_EXT_shader_framebuffer_fetch.\n" + "\t\tgl_LastFragData[location] is read from. The attachment to read from must be declared as an output in the shader.\n" + "\t[--vulkan-glsl-disable-ext-samplerless-texture-functions]:\n\t\tDo not allow use of GL_EXT_samperless_texture_functions, even in Vulkan GLSL.\n" + "\t\tUse of texelFetch and similar might have to create dummy samplers to work around it.\n" + "\t[--combined-samplers-inherit-bindings]:\n\t\tInherit binding information from the textures when building combined image samplers from separate textures and samplers.\n" + "\t[--no-support-nonzero-baseinstance]:\n\t\tWhen using gl_InstanceIndex with desktop GL,\n" + "\t\tassume that base instance is always 0, and do not attempt to fix up gl_InstanceID to match Vulkan semantics.\n" + "\t[--pls-in format input-name]:\n\t\tRemaps a subpass input with name into a GL_EXT_pixel_local_storage input.\n" + "\t\tEntry in PLS block is ordered where first --pls-in marks the first entry. Can be called multiple times.\n" + "\t\tFormats allowed: r11f_g11f_b10f, r32f, rg16f, rg16, rgb10_a2, rgba8, rgba8i, rgba8ui, rg16i, rgb10_a2ui, rg16ui, r32ui.\n" + "\t\tRequires ESSL.\n" + "\t[--pls-out format output-name]:\n\t\tRemaps a color output with name into a GL_EXT_pixel_local_storage output.\n" + "\t\tEntry in PLS block is ordered where first --pls-output marks the first entry. Can be called multiple times.\n" + "\t\tFormats allowed: r11f_g11f_b10f, r32f, rg16f, rg16, rgb10_a2, rgba8, rgba8i, rgba8ui, rg16i, rgb10_a2ui, rg16ui, r32ui.\n" + "\t\tRequires ESSL.\n" + "\t[--remap source_name target_name components]:\n\t\tRemaps a variable to a different name with N components.\n" + "\t\tMain use case is to remap a subpass input to gl_LastFragDepthARM.\n" + "\t\tE.g.:\n" + "\t\tuniform subpassInput uDepth;\n" + "\t\t--remap uDepth gl_LastFragDepthARM 1 --extension GL_ARM_shader_framebuffer_fetch_depth_stencil\n" + "\t[--no-420pack-extension]:\n\t\tDo not make use of GL_ARB_shading_language_420pack in older GL targets to support layout(binding).\n" + "\t[--remap-variable-type ]:\n\t\tRemaps a variable type based on name.\n" + "\t\tPrimary use case is supporting external samplers in ESSL for video rendering on Android where you could remap a texture to a YUV one.\n" + "\t[--glsl-force-flattened-io-blocks]:\n\t\tAlways flatten I/O blocks and structs.\n" + ); + // clang-format on +} + +static void print_help_hlsl() +{ + // clang-format off + fprintf(stderr, "\nHLSL options:\n" + "\t[--shader-model]:\n\t\tEnables a specific shader model, e.g. --shader-model 50 for SM 5.0.\n" + "\t[--hlsl-enable-compat]:\n\t\tAllow point size and point coord to be used, even if they won't work as expected.\n" + "\t\tPointSize is ignored, and PointCoord returns (0.5, 0.5).\n" + "\t[--hlsl-support-nonzero-basevertex-baseinstance]:\n\t\tSupport base vertex and base instance by emitting a special cbuffer declared as:\n" + "\t\tcbuffer SPIRV_Cross_VertexInfo { int SPIRV_Cross_BaseVertex; int SPIRV_Cross_BaseInstance; };\n" + "\t[--hlsl-auto-binding (push, cbv, srv, uav, sampler, all)]\n" + "\t\tDo not emit any : register(#) bindings for specific resource types, and rely on HLSL compiler to assign something.\n" + "\t[--hlsl-force-storage-buffer-as-uav]:\n\t\tAlways emit SSBOs as UAVs, even when marked as read-only.\n" + "\t\tNormally, SSBOs marked with NonWritable will be emitted as SRVs.\n" + "\t[--hlsl-nonwritable-uav-texture-as-srv]:\n\t\tEmit NonWritable storage images as SRV textures instead of UAV.\n" + "\t\tUsing this option messes with the type system. SPIRV-Cross cannot guarantee that this will work.\n" + "\t\tOne major problem area with this feature is function arguments, where we won't know if we're seeing a UAV or SRV.\n" + "\t\tShader must ensure that read/write state is consistent at all call sites.\n" + "\t[--set-hlsl-vertex-input-semantic ]:\n\t\tEmits a specific vertex input semantic for a given location.\n" + "\t\tOtherwise, TEXCOORD# is used as semantics, where # is location.\n" + "\t[--hlsl-enable-16bit-types]:\n\t\tEnables native use of half/int16_t/uint16_t and ByteAddressBuffer interaction with these types. Requires SM 6.2.\n" + "\t[--hlsl-flatten-matrix-vertex-input-semantics]:\n\t\tEmits matrix vertex inputs with input semantics as if they were independent vectors, e.g. TEXCOORD{2,3,4} rather than matrix form TEXCOORD2_{0,1,2}.\n" + ); + // clang-format on +} + +static void print_help_msl() +{ + // clang-format off + fprintf(stderr, "\nMSL options:\n" + "\t[--msl-version ]:\n\t\tUses a specific MSL version, e.g. --msl-version 20100 for MSL 2.1.\n" + "\t[--msl-capture-output]:\n\t\tWrites geometry varyings to a buffer instead of as stage-outputs.\n" + "\t[--msl-swizzle-texture-samples]:\n\t\tWorks around lack of support for VkImageView component swizzles.\n" + "\t\tThis has a massive impact on performance and bloat. Do not use this unless you are absolutely forced to.\n" + "\t\tTo use this feature, the API side must pass down swizzle buffers.\n" + "\t\tShould only be used by translation layers as a last resort.\n" + "\t\tRecent Metal versions do not require this workaround.\n" + "\t[--msl-ios]:\n\t\tTarget iOS Metal instead of macOS Metal.\n" + "\t[--msl-pad-fragment-output]:\n\t\tAlways emit color outputs as 4-component variables.\n" + "\t\tIn Metal, the fragment shader must emit at least as many components as the render target format.\n" + "\t[--msl-domain-lower-left]:\n\t\tUse a lower-left tessellation domain.\n" + "\t[--msl-argument-buffers]:\n\t\tEmit Indirect Argument buffers instead of plain bindings.\n" + "\t\tRequires MSL 2.0 to be enabled.\n" + "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n" + "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n" + "\t\tEmits [[color(N)]] inputs in fragment stage.\n" + "\t\tRequires an Apple GPU.\n" + "\t[--msl-emulate-cube-array]:\n\t\tEmulate cube arrays with 2D array and manual math.\n" + "\t[--msl-discrete-descriptor-set ]:\n\t\tWhen using argument buffers, forces a specific descriptor set to be implemented without argument buffers.\n" + "\t\tUseful for implementing push descriptors in emulation layers.\n" + "\t\tCan be used multiple times for each descriptor set in question.\n" + "\t[--msl-device-argument-buffer ]:\n\t\tUse device address space to hold indirect argument buffers instead of constant.\n" + "\t\tComes up when trying to support argument buffers which are larger than 64 KiB.\n" + "\t[--msl-multiview]:\n\t\tEnable SPV_KHR_multiview emulation.\n" + "\t[--msl-multiview-no-layered-rendering]:\n\t\tDon't set [[render_target_array_index]] in multiview shaders.\n" + "\t\tUseful for devices which don't support layered rendering. Only effective when --msl-multiview is enabled.\n" + "\t[--msl-view-index-from-device-index]:\n\t\tTreat the view index as the device index instead.\n" + "\t\tFor multi-GPU rendering.\n" + "\t[--msl-dispatch-base]:\n\t\tAdd support for vkCmdDispatchBase() or similar APIs.\n" + "\t\tOffsets the workgroup ID based on a buffer.\n" + "\t[--msl-dynamic-buffer ]:\n\t\tMarks a buffer as having dynamic offset.\n" + "\t\tThe offset is applied in the shader with pointer arithmetic.\n" + "\t\tUseful for argument buffers where it is non-trivial to apply dynamic offset otherwise.\n" + "\t[--msl-inline-uniform-block ]:\n\t\tIn argument buffers, mark an UBO as being an inline uniform block which is embedded into the argument buffer itself.\n" + "\t[--msl-decoration-binding]:\n\t\tUse SPIR-V bindings directly as MSL bindings.\n" + "\t\tThis does not work in the general case as there is no descriptor set support, and combined image samplers are split up.\n" + "\t\tHowever, if the shader author knows of binding limitations, this option will avoid the need for reflection on Metal side.\n" + "\t[--msl-force-active-argument-buffer-resources]:\n\t\tAlways emit resources which are part of argument buffers.\n" + "\t\tThis makes sure that similar shaders with same resource declarations can share the argument buffer as declaring an argument buffer implies an ABI.\n" + "\t[--msl-force-native-arrays]:\n\t\tRather than implementing array types as a templated value type ala std::array, use plain, native arrays.\n" + "\t\tThis will lead to worse code-gen, but can work around driver bugs on certain driver revisions of certain Intel-based Macbooks where template arrays break.\n" + "\t[--msl-disable-frag-depth-builtin]:\n\t\tDisables FragDepth output. Useful if pipeline does not enable depth, as pipeline creation might otherwise fail.\n" + "\t[--msl-disable-frag-stencil-ref-builtin]:\n\t\tDisable FragStencilRef output. Useful if pipeline does not enable stencil output, as pipeline creation might otherwise fail.\n" + "\t[--msl-enable-frag-output-mask ]:\n\t\tOnly selectively enable fragment outputs. Useful if pipeline does not enable fragment output for certain locations, as pipeline creation might otherwise fail.\n" + "\t[--msl-no-clip-distance-user-varying]:\n\t\tDo not emit user varyings to emulate gl_ClipDistance in fragment shaders.\n" + "\t[--msl-shader-input ]:\n\t\tSpecify the format of the shader input at .\n" + "\t\t can be 'any32', 'any16', 'u16', 'u8', or 'other', to indicate a 32-bit opaque value, 16-bit opaque value, 16-bit unsigned integer, 8-bit unsigned integer, " + "or other-typed variable. is the vector length of the variable, which must be greater than or equal to that declared in the shader.\n" + "\t\tUseful if shader stage interfaces don't match up, as pipeline creation might otherwise fail.\n" + "\t[--msl-multi-patch-workgroup]:\n\t\tUse the new style of tessellation control processing, where multiple patches are processed per workgroup.\n" + "\t\tThis should increase throughput by ensuring all the GPU's SIMD lanes are occupied, but it is not compatible with the old style.\n" + "\t\tIn addition, this style also passes input variables in buffers directly instead of using vertex attribute processing.\n" + "\t\tIn a future version of SPIRV-Cross, this will become the default.\n" + "\t[--msl-vertex-for-tessellation]:\n\t\tWhen handling a vertex shader, marks it as one that will be used with a new-style tessellation control shader.\n" + "\t\tThe vertex shader is output to MSL as a compute kernel which outputs vertices to the buffer in the order they are received, rather than in index order as with --msl-capture-output normally.\n" + "\t[--msl-additional-fixed-sample-mask ]:\n" + "\t\tSet an additional fixed sample mask. If the shader outputs a sample mask, then the final sample mask will be a bitwise AND of the two.\n" + "\t[--msl-arrayed-subpass-input]:\n\t\tAssume that images of dimension SubpassData have multiple layers. Layered input attachments are accessed relative to BuiltInLayer.\n" + "\t\tThis option has no effect if multiview is also enabled.\n" + "\t[--msl-r32ui-linear-texture-align ]:\n\t\tThe required alignment of linear textures of format MTLPixelFormatR32Uint.\n" + "\t\tThis is used to align the row stride for atomic accesses to such images.\n" + "\t[--msl-r32ui-linear-texture-align-constant-id ]:\n\t\tThe function constant ID to use for the linear texture alignment.\n" + "\t\tOn MSL 1.2 or later, you can override the alignment by setting this function constant.\n" + "\t[--msl-texture-1d-as-2d]:\n\t\tEmit Image variables of dimension Dim1D as texture2d.\n" + "\t\tIn Metal, 1D textures do not support all features that 2D textures do. Use this option if your code relies on these features.\n" + "\t[--msl-ios-use-simdgroup-functions]:\n\t\tUse simd_*() functions for subgroup ops instead of quad_*().\n" + "\t\tRecent Apple GPUs support SIMD-groups larger than a quad. Use this option to take advantage of this support.\n" + "\t[--msl-emulate-subgroups]:\n\t\tAssume subgroups of size 1.\n" + "\t\tIntended for Vulkan Portability implementations where Metal support for SIMD-groups is insufficient for true subgroups.\n" + "\t[--msl-fixed-subgroup-size ]:\n\t\tAssign a constant to the SubgroupSize builtin.\n" + "\t\tIntended for Vulkan Portability implementations where VK_EXT_subgroup_size_control is not supported or disabled.\n" + "\t\tIf 0, assume variable subgroup size as actually exposed by Metal.\n" + "\t[--msl-force-sample-rate-shading]:\n\t\tForce fragment shaders to run per sample.\n" + "\t\tThis adds a [[sample_id]] parameter if none is already present.\n"); + // clang-format on +} + +static void print_help_common() +{ + // clang-format off + fprintf(stderr, "\nCommon options:\n" + "\t[--entry name]:\n\t\tUse a specific entry point. By default, the first entry point in the module is used.\n" + "\t[--stage ]:\n\t\tForces use of a certain shader stage.\n" + "\t\tCan disambiguate the entry point if more than one entry point exists with same name, but different stage.\n" + "\t[--emit-line-directives]:\n\t\tIf SPIR-V has OpLine directives, aim to emit those accurately in output code as well.\n" + "\t[--rename-entry-point ]:\n\t\tRenames an entry point from what is declared in SPIR-V to code output.\n" + "\t\tMostly relevant for HLSL or MSL.\n" + "\t[--rename-interface-variable ]:\n\t\tRename an interface variable based on location decoration.\n" + "\t[--force-zero-initialized-variables]:\n\t\tForces temporary variables to be initialized to zero.\n" + "\t\tCan be useful in environments where compilers do not allow potentially uninitialized variables.\n" + "\t\tThis usually comes up with Phi temporaries.\n" + "\t[--fixup-clipspace]:\n\t\tFixup Z clip-space at the end of a vertex shader. The behavior is backend-dependent.\n" + "\t\tGLSL: Rewrites [0, w] Z range (D3D/Metal/Vulkan) to GL-style [-w, w].\n" + "\t\tHLSL/MSL: Rewrites [-w, w] Z range (GL) to D3D/Metal/Vulkan-style [0, w].\n" + "\t[--flip-vert-y]:\n\t\tInverts gl_Position.y (or equivalent) at the end of a vertex shader. This is equivalent to using negative viewport height.\n" + ); + // clang-format on +} + +static void print_help_obscure() +{ + // clang-format off + fprintf(stderr, "\nObscure options:\n" + "\tThese options are not meant to be used on a regular basis. They have some occasional uses in the test suite.\n" + + "\t[--force-temporary]:\n\t\tAggressively emit temporary expressions instead of forwarding expressions. Very rarely used and under-tested.\n" + "\t[--revision]:\n\t\tPrints build timestamp and Git commit information (updated when cmake is configured).\n" + "\t[--iterations iter]:\n\t\tRecompiles the same shader over and over, benchmarking related.\n" + "\t[--disable-storage-image-qualifier-deduction]:\n\t\tIf storage images are received without any nonwritable or nonreadable information,\n""" + "\t\tdo not attempt to analyze usage, and always emit read/write state.\n" + "\t[--flatten-multidimensional-arrays]:\n\t\tDo not support multi-dimensional arrays and flatten them to one dimension.\n" + "\t[--cpp-interface-name ]:\n\t\tEmit a specific class name in C++ codegen.\n" + ); + // clang-format on +} + +static void print_help() +{ + print_version(); + + // clang-format off + fprintf(stderr, "Usage: spirv-cross <...>\n" + "\nBasic:\n" + "\t[SPIR-V file]\n" + "\t[--output ]: If not provided, prints output to stdout.\n" + "\t[--dump-resources]:\n\t\tPrints a basic reflection of the SPIR-V module along with other output.\n" + "\t[--help]:\n\t\tPrints this help message.\n" + ); + // clang-format on + + print_help_backend(); + print_help_common(); + print_help_glsl(); + print_help_msl(); + print_help_hlsl(); + print_help_obscure(); +} + +static bool remap_generic(Compiler &compiler, const SmallVector &resources, const Remap &remap) +{ + auto itr = + find_if(begin(resources), end(resources), [&remap](const Resource &res) { return res.name == remap.src_name; }); + + if (itr != end(resources)) + { + compiler.set_remapped_variable_state(itr->id, true); + compiler.set_name(itr->id, remap.dst_name); + compiler.set_subpass_input_remapped_components(itr->id, remap.components); + return true; + } + else + return false; +} + +static vector remap_pls(const SmallVector &pls_variables, const SmallVector &resources, + const SmallVector *secondary_resources) +{ + vector ret; + + for (auto &pls : pls_variables) + { + bool found = false; + for (auto &res : resources) + { + if (res.name == pls.name) + { + ret.push_back({ res.id, pls.format }); + found = true; + break; + } + } + + if (!found && secondary_resources) + { + for (auto &res : *secondary_resources) + { + if (res.name == pls.name) + { + ret.push_back({ res.id, pls.format }); + found = true; + break; + } + } + } + + if (!found) + fprintf(stderr, "Did not find stage input/output/target with name \"%s\".\n", pls.name.c_str()); + } + + return ret; +} + +static PlsFormat pls_format(const char *str) +{ + if (!strcmp(str, "r11f_g11f_b10f")) + return PlsR11FG11FB10F; + else if (!strcmp(str, "r32f")) + return PlsR32F; + else if (!strcmp(str, "rg16f")) + return PlsRG16F; + else if (!strcmp(str, "rg16")) + return PlsRG16; + else if (!strcmp(str, "rgb10_a2")) + return PlsRGB10A2; + else if (!strcmp(str, "rgba8")) + return PlsRGBA8; + else if (!strcmp(str, "rgba8i")) + return PlsRGBA8I; + else if (!strcmp(str, "rgba8ui")) + return PlsRGBA8UI; + else if (!strcmp(str, "rg16i")) + return PlsRG16I; + else if (!strcmp(str, "rgb10_a2ui")) + return PlsRGB10A2UI; + else if (!strcmp(str, "rg16ui")) + return PlsRG16UI; + else if (!strcmp(str, "r32ui")) + return PlsR32UI; + else + return PlsNone; +} + +static ExecutionModel stage_to_execution_model(const std::string &stage) +{ + if (stage == "vert") + return ExecutionModelVertex; + else if (stage == "frag") + return ExecutionModelFragment; + else if (stage == "comp") + return ExecutionModelGLCompute; + else if (stage == "tesc") + return ExecutionModelTessellationControl; + else if (stage == "tese") + return ExecutionModelTessellationEvaluation; + else if (stage == "geom") + return ExecutionModelGeometry; + else + SPIRV_CROSS_THROW("Invalid stage."); +} + +static HLSLBindingFlags hlsl_resource_type_to_flag(const std::string &arg) +{ + if (arg == "push") + return HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT; + else if (arg == "cbv") + return HLSL_BINDING_AUTO_CBV_BIT; + else if (arg == "srv") + return HLSL_BINDING_AUTO_SRV_BIT; + else if (arg == "uav") + return HLSL_BINDING_AUTO_UAV_BIT; + else if (arg == "sampler") + return HLSL_BINDING_AUTO_SAMPLER_BIT; + else if (arg == "all") + return HLSL_BINDING_AUTO_ALL; + else + { + fprintf(stderr, "Invalid resource type for --hlsl-auto-binding: %s\n", arg.c_str()); + return 0; + } +} + +static string compile_iteration(const CLIArguments &args, std::vector spirv_file) +{ + Parser spirv_parser(move(spirv_file)); + spirv_parser.parse(); + + unique_ptr compiler; + bool combined_image_samplers = false; + bool build_dummy_sampler = false; + + if (args.cpp) + { + compiler.reset(new CompilerCPP(move(spirv_parser.get_parsed_ir()))); + if (args.cpp_interface_name) + static_cast(compiler.get())->set_interface_name(args.cpp_interface_name); + } + else if (args.msl) + { + compiler.reset(new CompilerMSL(move(spirv_parser.get_parsed_ir()))); + + auto *msl_comp = static_cast(compiler.get()); + auto msl_opts = msl_comp->get_msl_options(); + if (args.set_msl_version) + msl_opts.msl_version = args.msl_version; + msl_opts.capture_output_to_buffer = args.msl_capture_output_to_buffer; + msl_opts.swizzle_texture_samples = args.msl_swizzle_texture_samples; + msl_opts.invariant_float_math = args.msl_invariant_float_math; + if (args.msl_ios) + { + msl_opts.platform = CompilerMSL::Options::iOS; + msl_opts.emulate_cube_array = args.msl_emulate_cube_array; + } + msl_opts.use_framebuffer_fetch_subpasses = args.msl_framebuffer_fetch; + msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output; + msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left; + msl_opts.argument_buffers = args.msl_argument_buffers; + msl_opts.texture_buffer_native = args.msl_texture_buffer_native; + msl_opts.multiview = args.msl_multiview; + msl_opts.multiview_layered_rendering = args.msl_multiview_layered_rendering; + msl_opts.view_index_from_device_index = args.msl_view_index_from_device_index; + msl_opts.dispatch_base = args.msl_dispatch_base; + msl_opts.enable_decoration_binding = args.msl_decoration_binding; + msl_opts.force_active_argument_buffer_resources = args.msl_force_active_argument_buffer_resources; + msl_opts.force_native_arrays = args.msl_force_native_arrays; + msl_opts.enable_frag_depth_builtin = args.msl_enable_frag_depth_builtin; + msl_opts.enable_frag_stencil_ref_builtin = args.msl_enable_frag_stencil_ref_builtin; + msl_opts.enable_frag_output_mask = args.msl_enable_frag_output_mask; + msl_opts.enable_clip_distance_user_varying = args.msl_enable_clip_distance_user_varying; + msl_opts.multi_patch_workgroup = args.msl_multi_patch_workgroup; + msl_opts.vertex_for_tessellation = args.msl_vertex_for_tessellation; + msl_opts.additional_fixed_sample_mask = args.msl_additional_fixed_sample_mask; + msl_opts.arrayed_subpass_input = args.msl_arrayed_subpass_input; + msl_opts.r32ui_linear_texture_alignment = args.msl_r32ui_linear_texture_alignment; + msl_opts.r32ui_alignment_constant_id = args.msl_r32ui_alignment_constant_id; + msl_opts.texture_1D_as_2D = args.msl_texture_1d_as_2d; + msl_opts.ios_use_simdgroup_functions = args.msl_ios_use_simdgroup_functions; + msl_opts.emulate_subgroups = args.msl_emulate_subgroups; + msl_opts.fixed_subgroup_size = args.msl_fixed_subgroup_size; + msl_opts.force_sample_rate_shading = args.msl_force_sample_rate_shading; + msl_comp->set_msl_options(msl_opts); + for (auto &v : args.msl_discrete_descriptor_sets) + msl_comp->add_discrete_descriptor_set(v); + for (auto &v : args.msl_device_argument_buffers) + msl_comp->set_argument_buffer_device_address_space(v, true); + uint32_t i = 0; + for (auto &v : args.msl_dynamic_buffers) + msl_comp->add_dynamic_buffer(v.first, v.second, i++); + for (auto &v : args.msl_inline_uniform_blocks) + msl_comp->add_inline_uniform_block(v.first, v.second); + for (auto &v : args.msl_shader_inputs) + msl_comp->add_msl_shader_input(v); + } + else if (args.hlsl) + compiler.reset(new CompilerHLSL(move(spirv_parser.get_parsed_ir()))); + else + { + combined_image_samplers = !args.vulkan_semantics; + if (!args.vulkan_semantics || args.vulkan_glsl_disable_ext_samplerless_texture_functions) + build_dummy_sampler = true; + compiler.reset(new CompilerGLSL(move(spirv_parser.get_parsed_ir()))); + } + + if (!args.variable_type_remaps.empty()) + { + auto remap_cb = [&](const SPIRType &, const string &name, string &out) -> void { + for (const VariableTypeRemap &remap : args.variable_type_remaps) + if (name == remap.variable_name) + out = remap.new_variable_type; + }; + + compiler->set_variable_type_remap_callback(move(remap_cb)); + } + + for (auto &rename : args.entry_point_rename) + compiler->rename_entry_point(rename.old_name, rename.new_name, rename.execution_model); + + auto entry_points = compiler->get_entry_points_and_stages(); + auto entry_point = args.entry; + ExecutionModel model = ExecutionModelMax; + + if (!args.entry_stage.empty()) + { + model = stage_to_execution_model(args.entry_stage); + if (entry_point.empty()) + { + // Just use the first entry point with this stage. + for (auto &e : entry_points) + { + if (e.execution_model == model) + { + entry_point = e.name; + break; + } + } + + if (entry_point.empty()) + { + fprintf(stderr, "Could not find an entry point with stage: %s\n", args.entry_stage.c_str()); + exit(EXIT_FAILURE); + } + } + else + { + // Make sure both stage and name exists. + bool exists = false; + for (auto &e : entry_points) + { + if (e.execution_model == model && e.name == entry_point) + { + exists = true; + break; + } + } + + if (!exists) + { + fprintf(stderr, "Could not find an entry point %s with stage: %s\n", entry_point.c_str(), + args.entry_stage.c_str()); + exit(EXIT_FAILURE); + } + } + } + else if (!entry_point.empty()) + { + // Make sure there is just one entry point with this name, or the stage + // is ambiguous. + uint32_t stage_count = 0; + for (auto &e : entry_points) + { + if (e.name == entry_point) + { + stage_count++; + model = e.execution_model; + } + } + + if (stage_count == 0) + { + fprintf(stderr, "There is no entry point with name: %s\n", entry_point.c_str()); + exit(EXIT_FAILURE); + } + else if (stage_count > 1) + { + fprintf(stderr, "There is more than one entry point with name: %s. Use --stage.\n", entry_point.c_str()); + exit(EXIT_FAILURE); + } + } + + if (!entry_point.empty()) + compiler->set_entry_point(entry_point, model); + + if (!args.set_version && !compiler->get_common_options().version) + { + fprintf(stderr, "Didn't specify GLSL version and SPIR-V did not specify language.\n"); + print_help(); + exit(EXIT_FAILURE); + } + + CompilerGLSL::Options opts = compiler->get_common_options(); + if (args.set_version) + opts.version = args.version; + if (args.set_es) + opts.es = args.es; + opts.force_temporary = args.force_temporary; + opts.separate_shader_objects = args.sso; + opts.flatten_multidimensional_arrays = args.flatten_multidimensional_arrays; + opts.enable_420pack_extension = args.use_420pack_extension; + opts.vulkan_semantics = args.vulkan_semantics; + opts.vertex.fixup_clipspace = args.fixup; + opts.vertex.flip_vert_y = args.yflip; + opts.vertex.support_nonzero_base_instance = args.support_nonzero_baseinstance; + opts.emit_push_constant_as_uniform_buffer = args.glsl_emit_push_constant_as_ubo; + opts.emit_uniform_buffer_as_plain_uniforms = args.glsl_emit_ubo_as_plain_uniforms; + opts.force_flattened_io_blocks = args.glsl_force_flattened_io_blocks; + opts.emit_line_directives = args.emit_line_directives; + opts.enable_storage_image_qualifier_deduction = args.enable_storage_image_qualifier_deduction; + opts.force_zero_initialized_variables = args.force_zero_initialized_variables; + compiler->set_common_options(opts); + + for (auto &fetch : args.glsl_ext_framebuffer_fetch) + compiler->remap_ext_framebuffer_fetch(fetch.first, fetch.second); + + // Set HLSL specific options. + if (args.hlsl) + { + auto *hlsl = static_cast(compiler.get()); + auto hlsl_opts = hlsl->get_hlsl_options(); + if (args.set_shader_model) + { + if (args.shader_model < 30) + { + fprintf(stderr, "Shader model earlier than 30 (3.0) not supported.\n"); + exit(EXIT_FAILURE); + } + + hlsl_opts.shader_model = args.shader_model; + } + + if (args.hlsl_compat) + { + // Enable all compat options. + hlsl_opts.point_size_compat = true; + hlsl_opts.point_coord_compat = true; + } + + if (hlsl_opts.shader_model <= 30) + { + combined_image_samplers = true; + build_dummy_sampler = true; + } + + hlsl_opts.support_nonzero_base_vertex_base_instance = args.hlsl_support_nonzero_base; + hlsl_opts.force_storage_buffer_as_uav = args.hlsl_force_storage_buffer_as_uav; + hlsl_opts.nonwritable_uav_texture_as_srv = args.hlsl_nonwritable_uav_texture_as_srv; + hlsl_opts.enable_16bit_types = args.hlsl_enable_16bit_types; + hlsl_opts.flatten_matrix_vertex_input_semantics = args.hlsl_flatten_matrix_vertex_input_semantics; + hlsl->set_hlsl_options(hlsl_opts); + hlsl->set_resource_binding_flags(args.hlsl_binding_flags); + } + + if (build_dummy_sampler) + { + uint32_t sampler = compiler->build_dummy_sampler_for_combined_images(); + if (sampler != 0) + { + // Set some defaults to make validation happy. + compiler->set_decoration(sampler, DecorationDescriptorSet, 0); + compiler->set_decoration(sampler, DecorationBinding, 0); + } + } + + ShaderResources res; + if (args.remove_unused) + { + auto active = compiler->get_active_interface_variables(); + res = compiler->get_shader_resources(active); + compiler->set_enabled_interface_variables(move(active)); + } + else + res = compiler->get_shader_resources(); + + if (args.flatten_ubo) + { + for (auto &ubo : res.uniform_buffers) + compiler->flatten_buffer_block(ubo.id); + for (auto &ubo : res.push_constant_buffers) + compiler->flatten_buffer_block(ubo.id); + } + + auto pls_inputs = remap_pls(args.pls_in, res.stage_inputs, &res.subpass_inputs); + auto pls_outputs = remap_pls(args.pls_out, res.stage_outputs, nullptr); + compiler->remap_pixel_local_storage(move(pls_inputs), move(pls_outputs)); + + for (auto &ext : args.extensions) + compiler->require_extension(ext); + + for (auto &remap : args.remaps) + { + if (remap_generic(*compiler, res.stage_inputs, remap)) + continue; + if (remap_generic(*compiler, res.stage_outputs, remap)) + continue; + if (remap_generic(*compiler, res.subpass_inputs, remap)) + continue; + } + + for (auto &rename : args.interface_variable_renames) + { + if (rename.storageClass == StorageClassInput) + spirv_cross_util::rename_interface_variable(*compiler, res.stage_inputs, rename.location, + rename.variable_name); + else if (rename.storageClass == StorageClassOutput) + spirv_cross_util::rename_interface_variable(*compiler, res.stage_outputs, rename.location, + rename.variable_name); + else + { + fprintf(stderr, "error at --rename-interface-variable ...\n"); + exit(EXIT_FAILURE); + } + } + + if (combined_image_samplers) + { + compiler->build_combined_image_samplers(); + if (args.combined_samplers_inherit_bindings) + spirv_cross_util::inherit_combined_sampler_bindings(*compiler); + + // Give the remapped combined samplers new names. + for (auto &remap : compiler->get_combined_image_samplers()) + { + compiler->set_name(remap.combined_id, join("SPIRV_Cross_Combined", compiler->get_name(remap.image_id), + compiler->get_name(remap.sampler_id))); + } + } + + if (args.hlsl) + { + auto *hlsl_compiler = static_cast(compiler.get()); + uint32_t new_builtin = hlsl_compiler->remap_num_workgroups_builtin(); + if (new_builtin) + { + hlsl_compiler->set_decoration(new_builtin, DecorationDescriptorSet, 0); + hlsl_compiler->set_decoration(new_builtin, DecorationBinding, 0); + } + } + + if (args.hlsl) + { + for (auto &remap : args.hlsl_attr_remap) + static_cast(compiler.get())->add_vertex_attribute_remap(remap); + } + + auto ret = compiler->compile(); + + if (args.dump_resources) + { + print_resources(*compiler, res); + print_push_constant_resources(*compiler, res.push_constant_buffers); + print_spec_constants(*compiler); + print_capabilities_and_extensions(*compiler); + } + + return ret; +} + +static int main_inner(int argc, char *argv[]) +{ + CLIArguments args; + CLICallbacks cbs; + + cbs.add("--help", [](CLIParser &parser) { + print_help(); + parser.end(); + }); + cbs.add("--revision", [](CLIParser &parser) { + print_version(); + parser.end(); + }); + cbs.add("--output", [&args](CLIParser &parser) { args.output = parser.next_string(); }); + cbs.add("--es", [&args](CLIParser &) { + args.es = true; + args.set_es = true; + }); + cbs.add("--no-es", [&args](CLIParser &) { + args.es = false; + args.set_es = true; + }); + cbs.add("--version", [&args](CLIParser &parser) { + args.version = parser.next_uint(); + args.set_version = true; + }); + cbs.add("--dump-resources", [&args](CLIParser &) { args.dump_resources = true; }); + cbs.add("--force-temporary", [&args](CLIParser &) { args.force_temporary = true; }); + cbs.add("--flatten-ubo", [&args](CLIParser &) { args.flatten_ubo = true; }); + cbs.add("--fixup-clipspace", [&args](CLIParser &) { args.fixup = true; }); + cbs.add("--flip-vert-y", [&args](CLIParser &) { args.yflip = true; }); + cbs.add("--iterations", [&args](CLIParser &parser) { args.iterations = parser.next_uint(); }); + cbs.add("--cpp", [&args](CLIParser &) { args.cpp = true; }); + cbs.add("--reflect", [&args](CLIParser &parser) { args.reflect = parser.next_value_string("json"); }); + cbs.add("--cpp-interface-name", [&args](CLIParser &parser) { args.cpp_interface_name = parser.next_string(); }); + cbs.add("--metal", [&args](CLIParser &) { args.msl = true; }); // Legacy compatibility + cbs.add("--glsl-emit-push-constant-as-ubo", [&args](CLIParser &) { args.glsl_emit_push_constant_as_ubo = true; }); + cbs.add("--glsl-emit-ubo-as-plain-uniforms", [&args](CLIParser &) { args.glsl_emit_ubo_as_plain_uniforms = true; }); + cbs.add("--glsl-force-flattened-io-blocks", [&args](CLIParser &) { args.glsl_force_flattened_io_blocks = true; }); + cbs.add("--glsl-remap-ext-framebuffer-fetch", [&args](CLIParser &parser) { + uint32_t input_index = parser.next_uint(); + uint32_t color_attachment = parser.next_uint(); + args.glsl_ext_framebuffer_fetch.push_back({ input_index, color_attachment }); + }); + cbs.add("--vulkan-glsl-disable-ext-samplerless-texture-functions", + [&args](CLIParser &) { args.vulkan_glsl_disable_ext_samplerless_texture_functions = true; }); + cbs.add("--disable-storage-image-qualifier-deduction", + [&args](CLIParser &) { args.enable_storage_image_qualifier_deduction = false; }); + cbs.add("--force-zero-initialized-variables", + [&args](CLIParser &) { args.force_zero_initialized_variables = true; }); + cbs.add("--msl", [&args](CLIParser &) { args.msl = true; }); + cbs.add("--hlsl", [&args](CLIParser &) { args.hlsl = true; }); + cbs.add("--hlsl-enable-compat", [&args](CLIParser &) { args.hlsl_compat = true; }); + cbs.add("--hlsl-support-nonzero-basevertex-baseinstance", + [&args](CLIParser &) { args.hlsl_support_nonzero_base = true; }); + cbs.add("--hlsl-auto-binding", [&args](CLIParser &parser) { + args.hlsl_binding_flags |= hlsl_resource_type_to_flag(parser.next_string()); + }); + cbs.add("--hlsl-force-storage-buffer-as-uav", + [&args](CLIParser &) { args.hlsl_force_storage_buffer_as_uav = true; }); + cbs.add("--hlsl-nonwritable-uav-texture-as-srv", + [&args](CLIParser &) { args.hlsl_nonwritable_uav_texture_as_srv = true; }); + cbs.add("--hlsl-enable-16bit-types", [&args](CLIParser &) { args.hlsl_enable_16bit_types = true; }); + cbs.add("--hlsl-flatten-matrix-vertex-input-semantics", + [&args](CLIParser &) { args.hlsl_flatten_matrix_vertex_input_semantics = true; }); + cbs.add("--vulkan-semantics", [&args](CLIParser &) { args.vulkan_semantics = true; }); + cbs.add("-V", [&args](CLIParser &) { args.vulkan_semantics = true; }); + cbs.add("--flatten-multidimensional-arrays", [&args](CLIParser &) { args.flatten_multidimensional_arrays = true; }); + cbs.add("--no-420pack-extension", [&args](CLIParser &) { args.use_420pack_extension = false; }); + cbs.add("--msl-capture-output", [&args](CLIParser &) { args.msl_capture_output_to_buffer = true; }); + cbs.add("--msl-swizzle-texture-samples", [&args](CLIParser &) { args.msl_swizzle_texture_samples = true; }); + cbs.add("--msl-ios", [&args](CLIParser &) { args.msl_ios = true; }); + cbs.add("--msl-pad-fragment-output", [&args](CLIParser &) { args.msl_pad_fragment_output = true; }); + cbs.add("--msl-domain-lower-left", [&args](CLIParser &) { args.msl_domain_lower_left = true; }); + cbs.add("--msl-argument-buffers", [&args](CLIParser &) { args.msl_argument_buffers = true; }); + cbs.add("--msl-discrete-descriptor-set", + [&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); }); + cbs.add("--msl-device-argument-buffer", + [&args](CLIParser &parser) { args.msl_device_argument_buffers.push_back(parser.next_uint()); }); + cbs.add("--msl-texture-buffer-native", [&args](CLIParser &) { args.msl_texture_buffer_native = true; }); + cbs.add("--msl-framebuffer-fetch", [&args](CLIParser &) { args.msl_framebuffer_fetch = true; }); + cbs.add("--msl-invariant-float-math", [&args](CLIParser &) { args.msl_invariant_float_math = true; }); + cbs.add("--msl-emulate-cube-array", [&args](CLIParser &) { args.msl_emulate_cube_array = true; }); + cbs.add("--msl-multiview", [&args](CLIParser &) { args.msl_multiview = true; }); + cbs.add("--msl-multiview-no-layered-rendering", + [&args](CLIParser &) { args.msl_multiview_layered_rendering = false; }); + cbs.add("--msl-view-index-from-device-index", + [&args](CLIParser &) { args.msl_view_index_from_device_index = true; }); + cbs.add("--msl-dispatch-base", [&args](CLIParser &) { args.msl_dispatch_base = true; }); + cbs.add("--msl-dynamic-buffer", [&args](CLIParser &parser) { + args.msl_argument_buffers = true; + // Make sure next_uint() is called in-order. + uint32_t desc_set = parser.next_uint(); + uint32_t binding = parser.next_uint(); + args.msl_dynamic_buffers.push_back(make_pair(desc_set, binding)); + }); + cbs.add("--msl-decoration-binding", [&args](CLIParser &) { args.msl_decoration_binding = true; }); + cbs.add("--msl-force-active-argument-buffer-resources", + [&args](CLIParser &) { args.msl_force_active_argument_buffer_resources = true; }); + cbs.add("--msl-inline-uniform-block", [&args](CLIParser &parser) { + args.msl_argument_buffers = true; + // Make sure next_uint() is called in-order. + uint32_t desc_set = parser.next_uint(); + uint32_t binding = parser.next_uint(); + args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding)); + }); + cbs.add("--msl-force-native-arrays", [&args](CLIParser &) { args.msl_force_native_arrays = true; }); + cbs.add("--msl-disable-frag-depth-builtin", [&args](CLIParser &) { args.msl_enable_frag_depth_builtin = false; }); + cbs.add("--msl-disable-frag-stencil-ref-builtin", + [&args](CLIParser &) { args.msl_enable_frag_stencil_ref_builtin = false; }); + cbs.add("--msl-enable-frag-output-mask", + [&args](CLIParser &parser) { args.msl_enable_frag_output_mask = parser.next_hex_uint(); }); + cbs.add("--msl-no-clip-distance-user-varying", + [&args](CLIParser &) { args.msl_enable_clip_distance_user_varying = false; }); + cbs.add("--msl-shader-input", [&args](CLIParser &parser) { + MSLShaderInput input; + // Make sure next_uint() is called in-order. + input.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + input.format = MSL_SHADER_INPUT_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + input.format = MSL_SHADER_INPUT_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + input.format = MSL_SHADER_INPUT_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + input.format = MSL_SHADER_INPUT_FORMAT_UINT8; + else + input.format = MSL_SHADER_INPUT_FORMAT_OTHER; + input.vecsize = parser.next_uint(); + args.msl_shader_inputs.push_back(input); + }); + cbs.add("--msl-multi-patch-workgroup", [&args](CLIParser &) { args.msl_multi_patch_workgroup = true; }); + cbs.add("--msl-vertex-for-tessellation", [&args](CLIParser &) { args.msl_vertex_for_tessellation = true; }); + cbs.add("--msl-additional-fixed-sample-mask", + [&args](CLIParser &parser) { args.msl_additional_fixed_sample_mask = parser.next_hex_uint(); }); + cbs.add("--msl-arrayed-subpass-input", [&args](CLIParser &) { args.msl_arrayed_subpass_input = true; }); + cbs.add("--msl-r32ui-linear-texture-align", + [&args](CLIParser &parser) { args.msl_r32ui_linear_texture_alignment = parser.next_uint(); }); + cbs.add("--msl-r32ui-linear-texture-align-constant-id", + [&args](CLIParser &parser) { args.msl_r32ui_alignment_constant_id = parser.next_uint(); }); + cbs.add("--msl-texture-1d-as-2d", [&args](CLIParser &) { args.msl_texture_1d_as_2d = true; }); + cbs.add("--msl-ios-use-simdgroup-functions", [&args](CLIParser &) { args.msl_ios_use_simdgroup_functions = true; }); + cbs.add("--msl-emulate-subgroups", [&args](CLIParser &) { args.msl_emulate_subgroups = true; }); + cbs.add("--msl-fixed-subgroup-size", + [&args](CLIParser &parser) { args.msl_fixed_subgroup_size = parser.next_uint(); }); + cbs.add("--msl-force-sample-rate-shading", [&args](CLIParser &) { args.msl_force_sample_rate_shading = true; }); + cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); + cbs.add("--rename-entry-point", [&args](CLIParser &parser) { + auto old_name = parser.next_string(); + auto new_name = parser.next_string(); + auto model = stage_to_execution_model(parser.next_string()); + args.entry_point_rename.push_back({ old_name, new_name, move(model) }); + }); + cbs.add("--entry", [&args](CLIParser &parser) { args.entry = parser.next_string(); }); + cbs.add("--stage", [&args](CLIParser &parser) { args.entry_stage = parser.next_string(); }); + cbs.add("--separate-shader-objects", [&args](CLIParser &) { args.sso = true; }); + cbs.add("--set-hlsl-vertex-input-semantic", [&args](CLIParser &parser) { + HLSLVertexAttributeRemap remap; + remap.location = parser.next_uint(); + remap.semantic = parser.next_string(); + args.hlsl_attr_remap.push_back(move(remap)); + }); + + cbs.add("--remap", [&args](CLIParser &parser) { + string src = parser.next_string(); + string dst = parser.next_string(); + uint32_t components = parser.next_uint(); + args.remaps.push_back({ move(src), move(dst), components }); + }); + + cbs.add("--remap-variable-type", [&args](CLIParser &parser) { + string var_name = parser.next_string(); + string new_type = parser.next_string(); + args.variable_type_remaps.push_back({ move(var_name), move(new_type) }); + }); + + cbs.add("--rename-interface-variable", [&args](CLIParser &parser) { + StorageClass cls = StorageClassMax; + string clsStr = parser.next_string(); + if (clsStr == "in") + cls = StorageClassInput; + else if (clsStr == "out") + cls = StorageClassOutput; + + uint32_t loc = parser.next_uint(); + string var_name = parser.next_string(); + args.interface_variable_renames.push_back({ cls, loc, move(var_name) }); + }); + + cbs.add("--pls-in", [&args](CLIParser &parser) { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_in.push_back({ move(fmt), move(name) }); + }); + cbs.add("--pls-out", [&args](CLIParser &parser) { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_out.push_back({ move(fmt), move(name) }); + }); + cbs.add("--shader-model", [&args](CLIParser &parser) { + args.shader_model = parser.next_uint(); + args.set_shader_model = true; + }); + cbs.add("--msl-version", [&args](CLIParser &parser) { + args.msl_version = parser.next_uint(); + args.set_msl_version = true; + }); + + cbs.add("--remove-unused-variables", [&args](CLIParser &) { args.remove_unused = true; }); + cbs.add("--combined-samplers-inherit-bindings", + [&args](CLIParser &) { args.combined_samplers_inherit_bindings = true; }); + + cbs.add("--no-support-nonzero-baseinstance", [&](CLIParser &) { args.support_nonzero_baseinstance = false; }); + cbs.add("--emit-line-directives", [&args](CLIParser &) { args.emit_line_directives = true; }); + + cbs.default_handler = [&args](const char *value) { args.input = value; }; + cbs.error_handler = [] { print_help(); }; + + CLIParser parser{ move(cbs), argc - 1, argv + 1 }; + if (!parser.parse()) + return EXIT_FAILURE; + else if (parser.ended_state) + return EXIT_SUCCESS; + + if (!args.input) + { + fprintf(stderr, "Didn't specify input file.\n"); + print_help(); + return EXIT_FAILURE; + } + + auto spirv_file = read_spirv_file(args.input); + if (spirv_file.empty()) + return EXIT_FAILURE; + + // Special case reflection because it has little to do with the path followed by code-outputting compilers + if (!args.reflect.empty()) + { + Parser spirv_parser(move(spirv_file)); + spirv_parser.parse(); + + CompilerReflection compiler(move(spirv_parser.get_parsed_ir())); + compiler.set_format(args.reflect); + auto json = compiler.compile(); + if (args.output) + write_string_to_file(args.output, json.c_str()); + else + printf("%s", json.c_str()); + return EXIT_SUCCESS; + } + + string compiled_output; + + if (args.iterations == 1) + compiled_output = compile_iteration(args, move(spirv_file)); + else + { + for (unsigned i = 0; i < args.iterations; i++) + compiled_output = compile_iteration(args, spirv_file); + } + + if (args.output) + write_string_to_file(args.output, compiled_output.c_str()); + else + printf("%s", compiled_output.c_str()); + + return EXIT_SUCCESS; +} + +int main(int argc, char *argv[]) +{ +#ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS + return main_inner(argc, argv); +#else + // Make sure we catch the exception or it just disappears into the aether on Windows. + try + { + return main_inner(argc, argv); + } + catch (const std::exception &e) + { + fprintf(stderr, "SPIRV-Cross threw an exception: %s\n", e.what()); + return EXIT_FAILURE; + } +#endif +} diff --git a/dep/spirv-cross/spirv-cross.vcxproj b/dep/spirv-cross/spirv-cross.vcxproj new file mode 100644 index 000000000..193289784 --- /dev/null +++ b/dep/spirv-cross/spirv-cross.vcxproj @@ -0,0 +1,578 @@ + + + + + DebugFast + ARM64 + + + DebugFast + Win32 + + + DebugFast + x64 + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + ReleaseLTCG + ARM64 + + + ReleaseLTCG + Win32 + + + ReleaseLTCG + x64 + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2} + Win32Proj + spirv-cross + 10.0 + + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + + + + TurnOffAllWarnings + Disabled + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir);%(AdditionalIncludeDirectories) + false + stdcpp14 + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib32-debug;%(AdditionalLibraryDirectories) + + + + + + + + TurnOffAllWarnings + Disabled + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir);%(AdditionalIncludeDirectories) + false + stdcpp14 + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64-debug;%(AdditionalLibraryDirectories) + + + + + + + + TurnOffAllWarnings + Disabled + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir);%(AdditionalIncludeDirectories) + false + stdcpp14 + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64-debug;%(AdditionalLibraryDirectories) + + + + + + + + TurnOffAllWarnings + Disabled + _CRT_NONSTDC_NO_DEPRECATE;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir);%(AdditionalIncludeDirectories) + Default + false + stdcpp14 + false + true + OnlyExplicitInline + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib32-debug;%(AdditionalLibraryDirectories) + + + + + + + + TurnOffAllWarnings + Disabled + _CRT_NONSTDC_NO_DEPRECATE;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir);%(AdditionalIncludeDirectories) + Default + false + stdcpp14 + false + true + OnlyExplicitInline + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64-debug;%(AdditionalLibraryDirectories) + + + + + + + + TurnOffAllWarnings + Disabled + _CRT_NONSTDC_NO_DEPRECATE;_ITERATOR_DEBUG_LEVEL=1;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir);%(AdditionalIncludeDirectories) + Default + false + stdcpp14 + false + true + OnlyExplicitInline + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64-debug;%(AdditionalLibraryDirectories) + + + + + + TurnOffAllWarnings + + + MaxSpeed + true + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir);%(AdditionalIncludeDirectories) + false + stdcpp14 + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib32;%(AdditionalLibraryDirectories) + + + + + + TurnOffAllWarnings + + + MaxSpeed + true + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir);%(AdditionalIncludeDirectories) + true + stdcpp14 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib32;%(AdditionalLibraryDirectories) + + + + + + TurnOffAllWarnings + + + MaxSpeed + true + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir);%(AdditionalIncludeDirectories) + false + stdcpp14 + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64;%(AdditionalLibraryDirectories) + + + + + + TurnOffAllWarnings + + + MaxSpeed + true + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir);%(AdditionalIncludeDirectories) + false + stdcpp14 + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64;%(AdditionalLibraryDirectories) + + + + + + TurnOffAllWarnings + + + MaxSpeed + true + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir);%(AdditionalIncludeDirectories) + true + stdcpp14 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64;%(AdditionalLibraryDirectories) + + + + + + TurnOffAllWarnings + + + MaxSpeed + true + _CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir);%(AdditionalIncludeDirectories) + true + stdcpp14 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + SDL2.lib;SDL2main.lib;%(AdditionalDependencies) + $(SolutionDir)dep\lib64;%(AdditionalLibraryDirectories) + + + + + + + \ No newline at end of file diff --git a/dep/spirv-cross/spirv-cross.vcxproj.filters b/dep/spirv-cross/spirv-cross.vcxproj.filters new file mode 100644 index 000000000..ba5a866dd --- /dev/null +++ b/dep/spirv-cross/spirv-cross.vcxproj.filters @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dep/spirv-cross/spirv.h b/dep/spirv-cross/spirv.h new file mode 100644 index 000000000..dd9850dbb --- /dev/null +++ b/dep/spirv-cross/spirv.h @@ -0,0 +1,2104 @@ +/* +** Copyright (c) 2014-2020 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python, C#, D +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** - C# will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL +** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10500 +#define SPV_REVISION 3 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010500; +static const unsigned int SpvRevision = 3; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, + SpvSourceLanguageHLSL = 5, + SpvSourceLanguageMax = 0x7fffffff, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, + SpvExecutionModelTaskNV = 5267, + SpvExecutionModelMeshNV = 5268, + SpvExecutionModelRayGenerationKHR = 5313, + SpvExecutionModelRayGenerationNV = 5313, + SpvExecutionModelIntersectionKHR = 5314, + SpvExecutionModelIntersectionNV = 5314, + SpvExecutionModelAnyHitKHR = 5315, + SpvExecutionModelAnyHitNV = 5315, + SpvExecutionModelClosestHitKHR = 5316, + SpvExecutionModelClosestHitNV = 5316, + SpvExecutionModelMissKHR = 5317, + SpvExecutionModelMissNV = 5317, + SpvExecutionModelCallableKHR = 5318, + SpvExecutionModelCallableNV = 5318, + SpvExecutionModelMax = 0x7fffffff, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, + SpvAddressingModelPhysicalStorageBuffer64 = 5348, + SpvAddressingModelPhysicalStorageBuffer64EXT = 5348, + SpvAddressingModelMax = 0x7fffffff, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, + SpvMemoryModelVulkan = 3, + SpvMemoryModelVulkanKHR = 3, + SpvMemoryModelMax = 0x7fffffff, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, + SpvExecutionModeInitializer = 33, + SpvExecutionModeFinalizer = 34, + SpvExecutionModeSubgroupSize = 35, + SpvExecutionModeSubgroupsPerWorkgroup = 36, + SpvExecutionModeSubgroupsPerWorkgroupId = 37, + SpvExecutionModeLocalSizeId = 38, + SpvExecutionModeLocalSizeHintId = 39, + SpvExecutionModePostDepthCoverage = 4446, + SpvExecutionModeDenormPreserve = 4459, + SpvExecutionModeDenormFlushToZero = 4460, + SpvExecutionModeSignedZeroInfNanPreserve = 4461, + SpvExecutionModeRoundingModeRTE = 4462, + SpvExecutionModeRoundingModeRTZ = 4463, + SpvExecutionModeStencilRefReplacingEXT = 5027, + SpvExecutionModeOutputLinesNV = 5269, + SpvExecutionModeOutputPrimitivesNV = 5270, + SpvExecutionModeDerivativeGroupQuadsNV = 5289, + SpvExecutionModeDerivativeGroupLinearNV = 5290, + SpvExecutionModeOutputTrianglesNV = 5298, + SpvExecutionModePixelInterlockOrderedEXT = 5366, + SpvExecutionModePixelInterlockUnorderedEXT = 5367, + SpvExecutionModeSampleInterlockOrderedEXT = 5368, + SpvExecutionModeSampleInterlockUnorderedEXT = 5369, + SpvExecutionModeShadingRateInterlockOrderedEXT = 5370, + SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371, + SpvExecutionModeMax = 0x7fffffff, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, + SpvStorageClassStorageBuffer = 12, + SpvStorageClassCallableDataKHR = 5328, + SpvStorageClassCallableDataNV = 5328, + SpvStorageClassIncomingCallableDataKHR = 5329, + SpvStorageClassIncomingCallableDataNV = 5329, + SpvStorageClassRayPayloadKHR = 5338, + SpvStorageClassRayPayloadNV = 5338, + SpvStorageClassHitAttributeKHR = 5339, + SpvStorageClassHitAttributeNV = 5339, + SpvStorageClassIncomingRayPayloadKHR = 5342, + SpvStorageClassIncomingRayPayloadNV = 5342, + SpvStorageClassShaderRecordBufferKHR = 5343, + SpvStorageClassShaderRecordBufferNV = 5343, + SpvStorageClassPhysicalStorageBuffer = 5349, + SpvStorageClassPhysicalStorageBufferEXT = 5349, + SpvStorageClassMax = 0x7fffffff, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, + SpvDimMax = 0x7fffffff, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, + SpvSamplerAddressingModeMax = 0x7fffffff, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, + SpvSamplerFilterModeMax = 0x7fffffff, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, + SpvImageFormatMax = 0x7fffffff, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, + SpvImageChannelOrderABGR = 19, + SpvImageChannelOrderMax = 0x7fffffff, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, + SpvImageChannelDataTypeMax = 0x7fffffff, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, + SpvImageOperandsMakeTexelAvailableShift = 8, + SpvImageOperandsMakeTexelAvailableKHRShift = 8, + SpvImageOperandsMakeTexelVisibleShift = 9, + SpvImageOperandsMakeTexelVisibleKHRShift = 9, + SpvImageOperandsNonPrivateTexelShift = 10, + SpvImageOperandsNonPrivateTexelKHRShift = 10, + SpvImageOperandsVolatileTexelShift = 11, + SpvImageOperandsVolatileTexelKHRShift = 11, + SpvImageOperandsSignExtendShift = 12, + SpvImageOperandsZeroExtendShift = 13, + SpvImageOperandsMax = 0x7fffffff, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, + SpvImageOperandsMakeTexelAvailableMask = 0x00000100, + SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + SpvImageOperandsMakeTexelVisibleMask = 0x00000200, + SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + SpvImageOperandsNonPrivateTexelMask = 0x00000400, + SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400, + SpvImageOperandsVolatileTexelMask = 0x00000800, + SpvImageOperandsVolatileTexelKHRMask = 0x00000800, + SpvImageOperandsSignExtendMask = 0x00001000, + SpvImageOperandsZeroExtendMask = 0x00002000, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, + SpvFPFastMathModeMax = 0x7fffffff, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, + SpvFPRoundingModeMax = 0x7fffffff, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, + SpvLinkageTypeMax = 0x7fffffff, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, + SpvAccessQualifierMax = 0x7fffffff, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, + SpvFunctionParameterAttributeMax = 0x7fffffff, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationUniformId = 27, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, + SpvDecorationMaxByteOffset = 45, + SpvDecorationAlignmentId = 46, + SpvDecorationMaxByteOffsetId = 47, + SpvDecorationNoSignedWrap = 4469, + SpvDecorationNoUnsignedWrap = 4470, + SpvDecorationExplicitInterpAMD = 4999, + SpvDecorationOverrideCoverageNV = 5248, + SpvDecorationPassthroughNV = 5250, + SpvDecorationViewportRelativeNV = 5252, + SpvDecorationSecondaryViewportRelativeNV = 5256, + SpvDecorationPerPrimitiveNV = 5271, + SpvDecorationPerViewNV = 5272, + SpvDecorationPerTaskNV = 5273, + SpvDecorationPerVertexNV = 5285, + SpvDecorationNonUniform = 5300, + SpvDecorationNonUniformEXT = 5300, + SpvDecorationRestrictPointer = 5355, + SpvDecorationRestrictPointerEXT = 5355, + SpvDecorationAliasedPointer = 5356, + SpvDecorationAliasedPointerEXT = 5356, + SpvDecorationCounterBuffer = 5634, + SpvDecorationHlslCounterBufferGOOGLE = 5634, + SpvDecorationHlslSemanticGOOGLE = 5635, + SpvDecorationUserSemantic = 5635, + SpvDecorationUserTypeGOOGLE = 5636, + SpvDecorationMax = 0x7fffffff, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, + SpvBuiltInSubgroupEqMask = 4416, + SpvBuiltInSubgroupEqMaskKHR = 4416, + SpvBuiltInSubgroupGeMask = 4417, + SpvBuiltInSubgroupGeMaskKHR = 4417, + SpvBuiltInSubgroupGtMask = 4418, + SpvBuiltInSubgroupGtMaskKHR = 4418, + SpvBuiltInSubgroupLeMask = 4419, + SpvBuiltInSubgroupLeMaskKHR = 4419, + SpvBuiltInSubgroupLtMask = 4420, + SpvBuiltInSubgroupLtMaskKHR = 4420, + SpvBuiltInBaseVertex = 4424, + SpvBuiltInBaseInstance = 4425, + SpvBuiltInDrawIndex = 4426, + SpvBuiltInDeviceIndex = 4438, + SpvBuiltInViewIndex = 4440, + SpvBuiltInBaryCoordNoPerspAMD = 4992, + SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993, + SpvBuiltInBaryCoordNoPerspSampleAMD = 4994, + SpvBuiltInBaryCoordSmoothAMD = 4995, + SpvBuiltInBaryCoordSmoothCentroidAMD = 4996, + SpvBuiltInBaryCoordSmoothSampleAMD = 4997, + SpvBuiltInBaryCoordPullModelAMD = 4998, + SpvBuiltInFragStencilRefEXT = 5014, + SpvBuiltInViewportMaskNV = 5253, + SpvBuiltInSecondaryPositionNV = 5257, + SpvBuiltInSecondaryViewportMaskNV = 5258, + SpvBuiltInPositionPerViewNV = 5261, + SpvBuiltInViewportMaskPerViewNV = 5262, + SpvBuiltInFullyCoveredEXT = 5264, + SpvBuiltInTaskCountNV = 5274, + SpvBuiltInPrimitiveCountNV = 5275, + SpvBuiltInPrimitiveIndicesNV = 5276, + SpvBuiltInClipDistancePerViewNV = 5277, + SpvBuiltInCullDistancePerViewNV = 5278, + SpvBuiltInLayerPerViewNV = 5279, + SpvBuiltInMeshViewCountNV = 5280, + SpvBuiltInMeshViewIndicesNV = 5281, + SpvBuiltInBaryCoordNV = 5286, + SpvBuiltInBaryCoordNoPerspNV = 5287, + SpvBuiltInFragSizeEXT = 5292, + SpvBuiltInFragmentSizeNV = 5292, + SpvBuiltInFragInvocationCountEXT = 5293, + SpvBuiltInInvocationsPerPixelNV = 5293, + SpvBuiltInLaunchIdKHR = 5319, + SpvBuiltInLaunchIdNV = 5319, + SpvBuiltInLaunchSizeKHR = 5320, + SpvBuiltInLaunchSizeNV = 5320, + SpvBuiltInWorldRayOriginKHR = 5321, + SpvBuiltInWorldRayOriginNV = 5321, + SpvBuiltInWorldRayDirectionKHR = 5322, + SpvBuiltInWorldRayDirectionNV = 5322, + SpvBuiltInObjectRayOriginKHR = 5323, + SpvBuiltInObjectRayOriginNV = 5323, + SpvBuiltInObjectRayDirectionKHR = 5324, + SpvBuiltInObjectRayDirectionNV = 5324, + SpvBuiltInRayTminKHR = 5325, + SpvBuiltInRayTminNV = 5325, + SpvBuiltInRayTmaxKHR = 5326, + SpvBuiltInRayTmaxNV = 5326, + SpvBuiltInInstanceCustomIndexKHR = 5327, + SpvBuiltInInstanceCustomIndexNV = 5327, + SpvBuiltInObjectToWorldKHR = 5330, + SpvBuiltInObjectToWorldNV = 5330, + SpvBuiltInWorldToObjectKHR = 5331, + SpvBuiltInWorldToObjectNV = 5331, + SpvBuiltInHitTKHR = 5332, + SpvBuiltInHitTNV = 5332, + SpvBuiltInHitKindKHR = 5333, + SpvBuiltInHitKindNV = 5333, + SpvBuiltInIncomingRayFlagsKHR = 5351, + SpvBuiltInIncomingRayFlagsNV = 5351, + SpvBuiltInRayGeometryIndexKHR = 5352, + SpvBuiltInWarpsPerSMNV = 5374, + SpvBuiltInSMCountNV = 5375, + SpvBuiltInWarpIDNV = 5376, + SpvBuiltInSMIDNV = 5377, + SpvBuiltInMax = 0x7fffffff, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, + SpvSelectionControlMax = 0x7fffffff, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, + SpvLoopControlDependencyInfiniteShift = 2, + SpvLoopControlDependencyLengthShift = 3, + SpvLoopControlMinIterationsShift = 4, + SpvLoopControlMaxIterationsShift = 5, + SpvLoopControlIterationMultipleShift = 6, + SpvLoopControlPeelCountShift = 7, + SpvLoopControlPartialCountShift = 8, + SpvLoopControlMax = 0x7fffffff, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, + SpvLoopControlDependencyInfiniteMask = 0x00000004, + SpvLoopControlDependencyLengthMask = 0x00000008, + SpvLoopControlMinIterationsMask = 0x00000010, + SpvLoopControlMaxIterationsMask = 0x00000020, + SpvLoopControlIterationMultipleMask = 0x00000040, + SpvLoopControlPeelCountMask = 0x00000080, + SpvLoopControlPartialCountMask = 0x00000100, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, + SpvFunctionControlMax = 0x7fffffff, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, + SpvMemorySemanticsOutputMemoryShift = 12, + SpvMemorySemanticsOutputMemoryKHRShift = 12, + SpvMemorySemanticsMakeAvailableShift = 13, + SpvMemorySemanticsMakeAvailableKHRShift = 13, + SpvMemorySemanticsMakeVisibleShift = 14, + SpvMemorySemanticsMakeVisibleKHRShift = 14, + SpvMemorySemanticsVolatileShift = 15, + SpvMemorySemanticsMax = 0x7fffffff, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, + SpvMemorySemanticsOutputMemoryMask = 0x00001000, + SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000, + SpvMemorySemanticsMakeAvailableMask = 0x00002000, + SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000, + SpvMemorySemanticsMakeVisibleMask = 0x00004000, + SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000, + SpvMemorySemanticsVolatileMask = 0x00008000, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, + SpvMemoryAccessMakePointerAvailableShift = 3, + SpvMemoryAccessMakePointerAvailableKHRShift = 3, + SpvMemoryAccessMakePointerVisibleShift = 4, + SpvMemoryAccessMakePointerVisibleKHRShift = 4, + SpvMemoryAccessNonPrivatePointerShift = 5, + SpvMemoryAccessNonPrivatePointerKHRShift = 5, + SpvMemoryAccessMax = 0x7fffffff, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, + SpvMemoryAccessMakePointerAvailableMask = 0x00000008, + SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008, + SpvMemoryAccessMakePointerVisibleMask = 0x00000010, + SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, + SpvMemoryAccessNonPrivatePointerMask = 0x00000020, + SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, + SpvScopeQueueFamily = 5, + SpvScopeQueueFamilyKHR = 5, + SpvScopeShaderCallKHR = 6, + SpvScopeMax = 0x7fffffff, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, + SpvGroupOperationClusteredReduce = 3, + SpvGroupOperationPartitionedReduceNV = 6, + SpvGroupOperationPartitionedInclusiveScanNV = 7, + SpvGroupOperationPartitionedExclusiveScanNV = 8, + SpvGroupOperationMax = 0x7fffffff, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, + SpvKernelEnqueueFlagsMax = 0x7fffffff, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, + SpvKernelProfilingInfoMax = 0x7fffffff, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, + SpvCapabilitySubgroupDispatch = 58, + SpvCapabilityNamedBarrier = 59, + SpvCapabilityPipeStorage = 60, + SpvCapabilityGroupNonUniform = 61, + SpvCapabilityGroupNonUniformVote = 62, + SpvCapabilityGroupNonUniformArithmetic = 63, + SpvCapabilityGroupNonUniformBallot = 64, + SpvCapabilityGroupNonUniformShuffle = 65, + SpvCapabilityGroupNonUniformShuffleRelative = 66, + SpvCapabilityGroupNonUniformClustered = 67, + SpvCapabilityGroupNonUniformQuad = 68, + SpvCapabilityShaderLayer = 69, + SpvCapabilityShaderViewportIndex = 70, + SpvCapabilitySubgroupBallotKHR = 4423, + SpvCapabilityDrawParameters = 4427, + SpvCapabilitySubgroupVoteKHR = 4431, + SpvCapabilityStorageBuffer16BitAccess = 4433, + SpvCapabilityStorageUniformBufferBlock16 = 4433, + SpvCapabilityStorageUniform16 = 4434, + SpvCapabilityUniformAndStorageBuffer16BitAccess = 4434, + SpvCapabilityStoragePushConstant16 = 4435, + SpvCapabilityStorageInputOutput16 = 4436, + SpvCapabilityDeviceGroup = 4437, + SpvCapabilityMultiView = 4439, + SpvCapabilityVariablePointersStorageBuffer = 4441, + SpvCapabilityVariablePointers = 4442, + SpvCapabilityAtomicStorageOps = 4445, + SpvCapabilitySampleMaskPostDepthCoverage = 4447, + SpvCapabilityStorageBuffer8BitAccess = 4448, + SpvCapabilityUniformAndStorageBuffer8BitAccess = 4449, + SpvCapabilityStoragePushConstant8 = 4450, + SpvCapabilityDenormPreserve = 4464, + SpvCapabilityDenormFlushToZero = 4465, + SpvCapabilitySignedZeroInfNanPreserve = 4466, + SpvCapabilityRoundingModeRTE = 4467, + SpvCapabilityRoundingModeRTZ = 4468, + SpvCapabilityRayQueryProvisionalKHR = 4471, + SpvCapabilityRayTraversalPrimitiveCullingProvisionalKHR = 4478, + SpvCapabilityFloat16ImageAMD = 5008, + SpvCapabilityImageGatherBiasLodAMD = 5009, + SpvCapabilityFragmentMaskAMD = 5010, + SpvCapabilityStencilExportEXT = 5013, + SpvCapabilityImageReadWriteLodAMD = 5015, + SpvCapabilityShaderClockKHR = 5055, + SpvCapabilitySampleMaskOverrideCoverageNV = 5249, + SpvCapabilityGeometryShaderPassthroughNV = 5251, + SpvCapabilityShaderViewportIndexLayerEXT = 5254, + SpvCapabilityShaderViewportIndexLayerNV = 5254, + SpvCapabilityShaderViewportMaskNV = 5255, + SpvCapabilityShaderStereoViewNV = 5259, + SpvCapabilityPerViewAttributesNV = 5260, + SpvCapabilityFragmentFullyCoveredEXT = 5265, + SpvCapabilityMeshShadingNV = 5266, + SpvCapabilityImageFootprintNV = 5282, + SpvCapabilityFragmentBarycentricNV = 5284, + SpvCapabilityComputeDerivativeGroupQuadsNV = 5288, + SpvCapabilityFragmentDensityEXT = 5291, + SpvCapabilityShadingRateNV = 5291, + SpvCapabilityGroupNonUniformPartitionedNV = 5297, + SpvCapabilityShaderNonUniform = 5301, + SpvCapabilityShaderNonUniformEXT = 5301, + SpvCapabilityRuntimeDescriptorArray = 5302, + SpvCapabilityRuntimeDescriptorArrayEXT = 5302, + SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303, + SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306, + SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + SpvCapabilitySampledImageArrayNonUniformIndexing = 5307, + SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308, + SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + SpvCapabilityStorageImageArrayNonUniformIndexing = 5309, + SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310, + SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + SpvCapabilityRayTracingNV = 5340, + SpvCapabilityVulkanMemoryModel = 5345, + SpvCapabilityVulkanMemoryModelKHR = 5345, + SpvCapabilityVulkanMemoryModelDeviceScope = 5346, + SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + SpvCapabilityPhysicalStorageBufferAddresses = 5347, + SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347, + SpvCapabilityComputeDerivativeGroupLinearNV = 5350, + SpvCapabilityRayTracingProvisionalKHR = 5353, + SpvCapabilityCooperativeMatrixNV = 5357, + SpvCapabilityFragmentShaderSampleInterlockEXT = 5363, + SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372, + SpvCapabilityShaderSMBuiltinsNV = 5373, + SpvCapabilityFragmentShaderPixelInterlockEXT = 5378, + SpvCapabilityDemoteToHelperInvocationEXT = 5379, + SpvCapabilitySubgroupShuffleINTEL = 5568, + SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, + SpvCapabilitySubgroupImageBlockIOINTEL = 5570, + SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579, + SpvCapabilityIntegerFunctions2INTEL = 5584, + SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696, + SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + SpvCapabilityMax = 0x7fffffff, +} SpvCapability; + +typedef enum SpvRayFlagsShift_ { + SpvRayFlagsOpaqueKHRShift = 0, + SpvRayFlagsNoOpaqueKHRShift = 1, + SpvRayFlagsTerminateOnFirstHitKHRShift = 2, + SpvRayFlagsSkipClosestHitShaderKHRShift = 3, + SpvRayFlagsCullBackFacingTrianglesKHRShift = 4, + SpvRayFlagsCullFrontFacingTrianglesKHRShift = 5, + SpvRayFlagsCullOpaqueKHRShift = 6, + SpvRayFlagsCullNoOpaqueKHRShift = 7, + SpvRayFlagsSkipTrianglesKHRShift = 8, + SpvRayFlagsSkipAABBsKHRShift = 9, + SpvRayFlagsMax = 0x7fffffff, +} SpvRayFlagsShift; + +typedef enum SpvRayFlagsMask_ { + SpvRayFlagsMaskNone = 0, + SpvRayFlagsOpaqueKHRMask = 0x00000001, + SpvRayFlagsNoOpaqueKHRMask = 0x00000002, + SpvRayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + SpvRayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + SpvRayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + SpvRayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + SpvRayFlagsCullOpaqueKHRMask = 0x00000040, + SpvRayFlagsCullNoOpaqueKHRMask = 0x00000080, + SpvRayFlagsSkipTrianglesKHRMask = 0x00000100, + SpvRayFlagsSkipAABBsKHRMask = 0x00000200, +} SpvRayFlagsMask; + +typedef enum SpvRayQueryIntersection_ { + SpvRayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + SpvRayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + SpvRayQueryIntersectionMax = 0x7fffffff, +} SpvRayQueryIntersection; + +typedef enum SpvRayQueryCommittedIntersectionType_ { + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + SpvRayQueryCommittedIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCommittedIntersectionType; + +typedef enum SpvRayQueryCandidateIntersectionType_ { + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + SpvRayQueryCandidateIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCandidateIntersectionType; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, + SpvOpSizeOf = 321, + SpvOpTypePipeStorage = 322, + SpvOpConstantPipeStorage = 323, + SpvOpCreatePipeFromPipeStorage = 324, + SpvOpGetKernelLocalSizeForSubgroupCount = 325, + SpvOpGetKernelMaxNumSubgroups = 326, + SpvOpTypeNamedBarrier = 327, + SpvOpNamedBarrierInitialize = 328, + SpvOpMemoryNamedBarrier = 329, + SpvOpModuleProcessed = 330, + SpvOpExecutionModeId = 331, + SpvOpDecorateId = 332, + SpvOpGroupNonUniformElect = 333, + SpvOpGroupNonUniformAll = 334, + SpvOpGroupNonUniformAny = 335, + SpvOpGroupNonUniformAllEqual = 336, + SpvOpGroupNonUniformBroadcast = 337, + SpvOpGroupNonUniformBroadcastFirst = 338, + SpvOpGroupNonUniformBallot = 339, + SpvOpGroupNonUniformInverseBallot = 340, + SpvOpGroupNonUniformBallotBitExtract = 341, + SpvOpGroupNonUniformBallotBitCount = 342, + SpvOpGroupNonUniformBallotFindLSB = 343, + SpvOpGroupNonUniformBallotFindMSB = 344, + SpvOpGroupNonUniformShuffle = 345, + SpvOpGroupNonUniformShuffleXor = 346, + SpvOpGroupNonUniformShuffleUp = 347, + SpvOpGroupNonUniformShuffleDown = 348, + SpvOpGroupNonUniformIAdd = 349, + SpvOpGroupNonUniformFAdd = 350, + SpvOpGroupNonUniformIMul = 351, + SpvOpGroupNonUniformFMul = 352, + SpvOpGroupNonUniformSMin = 353, + SpvOpGroupNonUniformUMin = 354, + SpvOpGroupNonUniformFMin = 355, + SpvOpGroupNonUniformSMax = 356, + SpvOpGroupNonUniformUMax = 357, + SpvOpGroupNonUniformFMax = 358, + SpvOpGroupNonUniformBitwiseAnd = 359, + SpvOpGroupNonUniformBitwiseOr = 360, + SpvOpGroupNonUniformBitwiseXor = 361, + SpvOpGroupNonUniformLogicalAnd = 362, + SpvOpGroupNonUniformLogicalOr = 363, + SpvOpGroupNonUniformLogicalXor = 364, + SpvOpGroupNonUniformQuadBroadcast = 365, + SpvOpGroupNonUniformQuadSwap = 366, + SpvOpCopyLogical = 400, + SpvOpPtrEqual = 401, + SpvOpPtrNotEqual = 402, + SpvOpPtrDiff = 403, + SpvOpSubgroupBallotKHR = 4421, + SpvOpSubgroupFirstInvocationKHR = 4422, + SpvOpSubgroupAllKHR = 4428, + SpvOpSubgroupAnyKHR = 4429, + SpvOpSubgroupAllEqualKHR = 4430, + SpvOpSubgroupReadInvocationKHR = 4432, + SpvOpTypeRayQueryProvisionalKHR = 4472, + SpvOpRayQueryInitializeKHR = 4473, + SpvOpRayQueryTerminateKHR = 4474, + SpvOpRayQueryGenerateIntersectionKHR = 4475, + SpvOpRayQueryConfirmIntersectionKHR = 4476, + SpvOpRayQueryProceedKHR = 4477, + SpvOpRayQueryGetIntersectionTypeKHR = 4479, + SpvOpGroupIAddNonUniformAMD = 5000, + SpvOpGroupFAddNonUniformAMD = 5001, + SpvOpGroupFMinNonUniformAMD = 5002, + SpvOpGroupUMinNonUniformAMD = 5003, + SpvOpGroupSMinNonUniformAMD = 5004, + SpvOpGroupFMaxNonUniformAMD = 5005, + SpvOpGroupUMaxNonUniformAMD = 5006, + SpvOpGroupSMaxNonUniformAMD = 5007, + SpvOpFragmentMaskFetchAMD = 5011, + SpvOpFragmentFetchAMD = 5012, + SpvOpReadClockKHR = 5056, + SpvOpImageSampleFootprintNV = 5283, + SpvOpGroupNonUniformPartitionNV = 5296, + SpvOpWritePackedPrimitiveIndices4x8NV = 5299, + SpvOpReportIntersectionKHR = 5334, + SpvOpReportIntersectionNV = 5334, + SpvOpIgnoreIntersectionKHR = 5335, + SpvOpIgnoreIntersectionNV = 5335, + SpvOpTerminateRayKHR = 5336, + SpvOpTerminateRayNV = 5336, + SpvOpTraceNV = 5337, + SpvOpTraceRayKHR = 5337, + SpvOpTypeAccelerationStructureKHR = 5341, + SpvOpTypeAccelerationStructureNV = 5341, + SpvOpExecuteCallableKHR = 5344, + SpvOpExecuteCallableNV = 5344, + SpvOpTypeCooperativeMatrixNV = 5358, + SpvOpCooperativeMatrixLoadNV = 5359, + SpvOpCooperativeMatrixStoreNV = 5360, + SpvOpCooperativeMatrixMulAddNV = 5361, + SpvOpCooperativeMatrixLengthNV = 5362, + SpvOpBeginInvocationInterlockEXT = 5364, + SpvOpEndInvocationInterlockEXT = 5365, + SpvOpDemoteToHelperInvocationEXT = 5380, + SpvOpIsHelperInvocationEXT = 5381, + SpvOpSubgroupShuffleINTEL = 5571, + SpvOpSubgroupShuffleDownINTEL = 5572, + SpvOpSubgroupShuffleUpINTEL = 5573, + SpvOpSubgroupShuffleXorINTEL = 5574, + SpvOpSubgroupBlockReadINTEL = 5575, + SpvOpSubgroupBlockWriteINTEL = 5576, + SpvOpSubgroupImageBlockReadINTEL = 5577, + SpvOpSubgroupImageBlockWriteINTEL = 5578, + SpvOpSubgroupImageMediaBlockReadINTEL = 5580, + SpvOpSubgroupImageMediaBlockWriteINTEL = 5581, + SpvOpUCountLeadingZerosINTEL = 5585, + SpvOpUCountTrailingZerosINTEL = 5586, + SpvOpAbsISubINTEL = 5587, + SpvOpAbsUSubINTEL = 5588, + SpvOpIAddSatINTEL = 5589, + SpvOpUAddSatINTEL = 5590, + SpvOpIAverageINTEL = 5591, + SpvOpUAverageINTEL = 5592, + SpvOpIAverageRoundedINTEL = 5593, + SpvOpUAverageRoundedINTEL = 5594, + SpvOpISubSatINTEL = 5595, + SpvOpUSubSatINTEL = 5596, + SpvOpIMul32x16INTEL = 5597, + SpvOpUMul32x16INTEL = 5598, + SpvOpDecorateString = 5632, + SpvOpDecorateStringGOOGLE = 5632, + SpvOpMemberDecorateString = 5633, + SpvOpMemberDecorateStringGOOGLE = 5633, + SpvOpVmeImageINTEL = 5699, + SpvOpTypeVmeImageINTEL = 5700, + SpvOpTypeAvcImePayloadINTEL = 5701, + SpvOpTypeAvcRefPayloadINTEL = 5702, + SpvOpTypeAvcSicPayloadINTEL = 5703, + SpvOpTypeAvcMcePayloadINTEL = 5704, + SpvOpTypeAvcMceResultINTEL = 5705, + SpvOpTypeAvcImeResultINTEL = 5706, + SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710, + SpvOpTypeAvcRefResultINTEL = 5711, + SpvOpTypeAvcSicResultINTEL = 5712, + SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733, + SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735, + SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737, + SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + SpvOpSubgroupAvcImeInitializeINTEL = 5747, + SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749, + SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750, + SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765, + SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776, + SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + SpvOpSubgroupAvcFmeInitializeINTEL = 5781, + SpvOpSubgroupAvcBmeInitializeINTEL = 5782, + SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790, + SpvOpSubgroupAvcSicInitializeINTEL = 5791, + SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792, + SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803, + SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808, + SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + SpvOpRayQueryGetRayTMinKHR = 6016, + SpvOpRayQueryGetRayFlagsKHR = 6017, + SpvOpRayQueryGetIntersectionTKHR = 6018, + SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + SpvOpRayQueryGetIntersectionInstanceIdKHR = 6020, + SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + SpvOpRayQueryGetIntersectionGeometryIndexKHR = 6022, + SpvOpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + SpvOpRayQueryGetIntersectionBarycentricsKHR = 6024, + SpvOpRayQueryGetIntersectionFrontFaceKHR = 6025, + SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + SpvOpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + SpvOpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + SpvOpRayQueryGetWorldRayDirectionKHR = 6029, + SpvOpRayQueryGetWorldRayOriginKHR = 6030, + SpvOpRayQueryGetIntersectionObjectToWorldKHR = 6031, + SpvOpRayQueryGetIntersectionWorldToObjectKHR = 6032, + SpvOpMax = 0x7fffffff, +} SpvOp; + +#ifdef SPV_ENABLE_UTILITY_CODE +inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case SpvOpNop: *hasResult = false; *hasResultType = false; break; + case SpvOpUndef: *hasResult = true; *hasResultType = true; break; + case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break; + case SpvOpSource: *hasResult = false; *hasResultType = false; break; + case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpName: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberName: *hasResult = false; *hasResultType = false; break; + case SpvOpString: *hasResult = true; *hasResultType = false; break; + case SpvOpLine: *hasResult = false; *hasResultType = false; break; + case SpvOpExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break; + case SpvOpExtInst: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break; + case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break; + case SpvOpCapability: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case SpvOpFunction: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break; + case SpvOpVariable: *hasResult = true; *hasResultType = true; break; + case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case SpvOpLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpStore: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break; + case SpvOpTranspose: *hasResult = true; *hasResultType = true; break; + case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageRead: *hasResult = true; *hasResultType = true; break; + case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break; + case SpvOpImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break; + case SpvOpUConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpSConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpFConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case SpvOpBitcast: *hasResult = true; *hasResultType = true; break; + case SpvOpSNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpFNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpISub: *hasResult = true; *hasResultType = true; break; + case SpvOpFSub: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpUDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpSDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpFDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpUMod: *hasResult = true; *hasResultType = true; break; + case SpvOpSRem: *hasResult = true; *hasResultType = true; break; + case SpvOpSMod: *hasResult = true; *hasResultType = true; break; + case SpvOpFRem: *hasResult = true; *hasResultType = true; break; + case SpvOpFMod: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break; + case SpvOpDot: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break; + case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break; + case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpAny: *hasResult = true; *hasResultType = true; break; + case SpvOpAll: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNan: *hasResult = true; *hasResultType = true; break; + case SpvOpIsInf: *hasResult = true; *hasResultType = true; break; + case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break; + case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break; + case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case SpvOpOrdered: *hasResult = true; *hasResultType = true; break; + case SpvOpUnordered: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break; + case SpvOpSelect: *hasResult = true; *hasResultType = true; break; + case SpvOpIEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpNot: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break; + case SpvOpBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdx: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdy: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidth: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break; + case SpvOpPhi: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpLabel: *hasResult = true; *hasResultType = false; break; + case SpvOpBranch: *hasResult = false; *hasResultType = false; break; + case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break; + case SpvOpSwitch: *hasResult = false; *hasResultType = false; break; + case SpvOpKill: *hasResult = false; *hasResultType = false; break; + case SpvOpReturn: *hasResult = false; *hasResultType = false; break; + case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break; + case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case SpvOpNoLine: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break; + case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeRayQueryProvisionalKHR: *hasResult = true; *hasResultType = false; break; + case SpvOpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +#endif + diff --git a/dep/spirv-cross/spirv.hpp b/dep/spirv-cross/spirv.hpp new file mode 100644 index 000000000..dae36cf20 --- /dev/null +++ b/dep/spirv-cross/spirv.hpp @@ -0,0 +1,2114 @@ +// Copyright (c) 2014-2020 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10500 +#define SPV_REVISION 3 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010500; +static const unsigned int Revision = 3; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, + SourceLanguageHLSL = 5, + SourceLanguageMax = 0x7fffffff, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, + ExecutionModelTaskNV = 5267, + ExecutionModelMeshNV = 5268, + ExecutionModelRayGenerationKHR = 5313, + ExecutionModelRayGenerationNV = 5313, + ExecutionModelIntersectionKHR = 5314, + ExecutionModelIntersectionNV = 5314, + ExecutionModelAnyHitKHR = 5315, + ExecutionModelAnyHitNV = 5315, + ExecutionModelClosestHitKHR = 5316, + ExecutionModelClosestHitNV = 5316, + ExecutionModelMissKHR = 5317, + ExecutionModelMissNV = 5317, + ExecutionModelCallableKHR = 5318, + ExecutionModelCallableNV = 5318, + ExecutionModelMax = 0x7fffffff, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, + AddressingModelPhysicalStorageBuffer64EXT = 5348, + AddressingModelMax = 0x7fffffff, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, + MemoryModelVulkanKHR = 3, + MemoryModelMax = 0x7fffffff, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, + ExecutionModeInitializer = 33, + ExecutionModeFinalizer = 34, + ExecutionModeSubgroupSize = 35, + ExecutionModeSubgroupsPerWorkgroup = 36, + ExecutionModeSubgroupsPerWorkgroupId = 37, + ExecutionModeLocalSizeId = 38, + ExecutionModeLocalSizeHintId = 39, + ExecutionModePostDepthCoverage = 4446, + ExecutionModeDenormPreserve = 4459, + ExecutionModeDenormFlushToZero = 4460, + ExecutionModeSignedZeroInfNanPreserve = 4461, + ExecutionModeRoundingModeRTE = 4462, + ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesNV = 5270, + ExecutionModeDerivativeGroupQuadsNV = 5289, + ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesNV = 5298, + ExecutionModePixelInterlockOrderedEXT = 5366, + ExecutionModePixelInterlockUnorderedEXT = 5367, + ExecutionModeSampleInterlockOrderedEXT = 5368, + ExecutionModeSampleInterlockUnorderedEXT = 5369, + ExecutionModeShadingRateInterlockOrderedEXT = 5370, + ExecutionModeShadingRateInterlockUnorderedEXT = 5371, + ExecutionModeMax = 0x7fffffff, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, + StorageClassStorageBuffer = 12, + StorageClassCallableDataKHR = 5328, + StorageClassCallableDataNV = 5328, + StorageClassIncomingCallableDataKHR = 5329, + StorageClassIncomingCallableDataNV = 5329, + StorageClassRayPayloadKHR = 5338, + StorageClassRayPayloadNV = 5338, + StorageClassHitAttributeKHR = 5339, + StorageClassHitAttributeNV = 5339, + StorageClassIncomingRayPayloadKHR = 5342, + StorageClassIncomingRayPayloadNV = 5342, + StorageClassShaderRecordBufferKHR = 5343, + StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, + StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassMax = 0x7fffffff, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, + DimMax = 0x7fffffff, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, + SamplerAddressingModeMax = 0x7fffffff, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, + SamplerFilterModeMax = 0x7fffffff, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, + ImageFormatMax = 0x7fffffff, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, + ImageChannelOrderABGR = 19, + ImageChannelOrderMax = 0x7fffffff, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeMax = 0x7fffffff, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, + ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, + ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, + ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, + ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsSignExtendShift = 12, + ImageOperandsZeroExtendShift = 13, + ImageOperandsMax = 0x7fffffff, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, + ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, + ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, + ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, + ImageOperandsVolatileTexelKHRMask = 0x00000800, + ImageOperandsSignExtendMask = 0x00001000, + ImageOperandsZeroExtendMask = 0x00002000, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, + FPFastMathModeMax = 0x7fffffff, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, + FPRoundingModeMax = 0x7fffffff, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, + LinkageTypeMax = 0x7fffffff, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, + AccessQualifierMax = 0x7fffffff, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeMax = 0x7fffffff, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationUniformId = 27, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, + DecorationMaxByteOffset = 45, + DecorationAlignmentId = 46, + DecorationMaxByteOffsetId = 47, + DecorationNoSignedWrap = 4469, + DecorationNoUnsignedWrap = 4470, + DecorationExplicitInterpAMD = 4999, + DecorationOverrideCoverageNV = 5248, + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveNV = 5271, + DecorationPerViewNV = 5272, + DecorationPerTaskNV = 5273, + DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, + DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, + DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, + DecorationAliasedPointerEXT = 5356, + DecorationCounterBuffer = 5634, + DecorationHlslCounterBufferGOOGLE = 5634, + DecorationHlslSemanticGOOGLE = 5635, + DecorationUserSemantic = 5635, + DecorationUserTypeGOOGLE = 5636, + DecorationMax = 0x7fffffff, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, + BuiltInSubgroupEqMask = 4416, + BuiltInSubgroupEqMaskKHR = 4416, + BuiltInSubgroupGeMask = 4417, + BuiltInSubgroupGeMaskKHR = 4417, + BuiltInSubgroupGtMask = 4418, + BuiltInSubgroupGtMaskKHR = 4418, + BuiltInSubgroupLeMask = 4419, + BuiltInSubgroupLeMaskKHR = 4419, + BuiltInSubgroupLtMask = 4420, + BuiltInSubgroupLtMaskKHR = 4420, + BuiltInBaseVertex = 4424, + BuiltInBaseInstance = 4425, + BuiltInDrawIndex = 4426, + BuiltInDeviceIndex = 4438, + BuiltInViewIndex = 4440, + BuiltInBaryCoordNoPerspAMD = 4992, + BuiltInBaryCoordNoPerspCentroidAMD = 4993, + BuiltInBaryCoordNoPerspSampleAMD = 4994, + BuiltInBaryCoordSmoothAMD = 4995, + BuiltInBaryCoordSmoothCentroidAMD = 4996, + BuiltInBaryCoordSmoothSampleAMD = 4997, + BuiltInBaryCoordPullModelAMD = 4998, + BuiltInFragStencilRefEXT = 5014, + BuiltInViewportMaskNV = 5253, + BuiltInSecondaryPositionNV = 5257, + BuiltInSecondaryViewportMaskNV = 5258, + BuiltInPositionPerViewNV = 5261, + BuiltInViewportMaskPerViewNV = 5262, + BuiltInFullyCoveredEXT = 5264, + BuiltInTaskCountNV = 5274, + BuiltInPrimitiveCountNV = 5275, + BuiltInPrimitiveIndicesNV = 5276, + BuiltInClipDistancePerViewNV = 5277, + BuiltInCullDistancePerViewNV = 5278, + BuiltInLayerPerViewNV = 5279, + BuiltInMeshViewCountNV = 5280, + BuiltInMeshViewIndicesNV = 5281, + BuiltInBaryCoordNV = 5286, + BuiltInBaryCoordNoPerspNV = 5287, + BuiltInFragSizeEXT = 5292, + BuiltInFragmentSizeNV = 5292, + BuiltInFragInvocationCountEXT = 5293, + BuiltInInvocationsPerPixelNV = 5293, + BuiltInLaunchIdKHR = 5319, + BuiltInLaunchIdNV = 5319, + BuiltInLaunchSizeKHR = 5320, + BuiltInLaunchSizeNV = 5320, + BuiltInWorldRayOriginKHR = 5321, + BuiltInWorldRayOriginNV = 5321, + BuiltInWorldRayDirectionKHR = 5322, + BuiltInWorldRayDirectionNV = 5322, + BuiltInObjectRayOriginKHR = 5323, + BuiltInObjectRayOriginNV = 5323, + BuiltInObjectRayDirectionKHR = 5324, + BuiltInObjectRayDirectionNV = 5324, + BuiltInRayTminKHR = 5325, + BuiltInRayTminNV = 5325, + BuiltInRayTmaxKHR = 5326, + BuiltInRayTmaxNV = 5326, + BuiltInInstanceCustomIndexKHR = 5327, + BuiltInInstanceCustomIndexNV = 5327, + BuiltInObjectToWorldKHR = 5330, + BuiltInObjectToWorldNV = 5330, + BuiltInWorldToObjectKHR = 5331, + BuiltInWorldToObjectNV = 5331, + BuiltInHitTKHR = 5332, + BuiltInHitTNV = 5332, + BuiltInHitKindKHR = 5333, + BuiltInHitKindNV = 5333, + BuiltInIncomingRayFlagsKHR = 5351, + BuiltInIncomingRayFlagsNV = 5351, + BuiltInRayGeometryIndexKHR = 5352, + BuiltInWarpsPerSMNV = 5374, + BuiltInSMCountNV = 5375, + BuiltInWarpIDNV = 5376, + BuiltInSMIDNV = 5377, + BuiltInMax = 0x7fffffff, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, + SelectionControlMax = 0x7fffffff, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, + LoopControlDependencyInfiniteShift = 2, + LoopControlDependencyLengthShift = 3, + LoopControlMinIterationsShift = 4, + LoopControlMaxIterationsShift = 5, + LoopControlIterationMultipleShift = 6, + LoopControlPeelCountShift = 7, + LoopControlPartialCountShift = 8, + LoopControlMax = 0x7fffffff, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, + LoopControlDependencyInfiniteMask = 0x00000004, + LoopControlDependencyLengthMask = 0x00000008, + LoopControlMinIterationsMask = 0x00000010, + LoopControlMaxIterationsMask = 0x00000020, + LoopControlIterationMultipleMask = 0x00000040, + LoopControlPeelCountMask = 0x00000080, + LoopControlPartialCountMask = 0x00000100, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, + FunctionControlMax = 0x7fffffff, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, + MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, + MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, + MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsVolatileShift = 15, + MemorySemanticsMax = 0x7fffffff, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, + MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, + MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, + MemorySemanticsMakeVisibleKHRMask = 0x00004000, + MemorySemanticsVolatileMask = 0x00008000, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, + MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, + MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, + MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessMax = 0x7fffffff, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, + MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, + MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, + MemoryAccessNonPrivatePointerKHRMask = 0x00000020, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, + ScopeQueueFamily = 5, + ScopeQueueFamilyKHR = 5, + ScopeShaderCallKHR = 6, + ScopeMax = 0x7fffffff, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, + GroupOperationClusteredReduce = 3, + GroupOperationPartitionedReduceNV = 6, + GroupOperationPartitionedInclusiveScanNV = 7, + GroupOperationPartitionedExclusiveScanNV = 8, + GroupOperationMax = 0x7fffffff, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, + KernelEnqueueFlagsMax = 0x7fffffff, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, + KernelProfilingInfoMax = 0x7fffffff, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, + CapabilitySubgroupDispatch = 58, + CapabilityNamedBarrier = 59, + CapabilityPipeStorage = 60, + CapabilityGroupNonUniform = 61, + CapabilityGroupNonUniformVote = 62, + CapabilityGroupNonUniformArithmetic = 63, + CapabilityGroupNonUniformBallot = 64, + CapabilityGroupNonUniformShuffle = 65, + CapabilityGroupNonUniformShuffleRelative = 66, + CapabilityGroupNonUniformClustered = 67, + CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, + CapabilitySubgroupBallotKHR = 4423, + CapabilityDrawParameters = 4427, + CapabilitySubgroupVoteKHR = 4431, + CapabilityStorageBuffer16BitAccess = 4433, + CapabilityStorageUniformBufferBlock16 = 4433, + CapabilityStorageUniform16 = 4434, + CapabilityUniformAndStorageBuffer16BitAccess = 4434, + CapabilityStoragePushConstant16 = 4435, + CapabilityStorageInputOutput16 = 4436, + CapabilityDeviceGroup = 4437, + CapabilityMultiView = 4439, + CapabilityVariablePointersStorageBuffer = 4441, + CapabilityVariablePointers = 4442, + CapabilityAtomicStorageOps = 4445, + CapabilitySampleMaskPostDepthCoverage = 4447, + CapabilityStorageBuffer8BitAccess = 4448, + CapabilityUniformAndStorageBuffer8BitAccess = 4449, + CapabilityStoragePushConstant8 = 4450, + CapabilityDenormPreserve = 4464, + CapabilityDenormFlushToZero = 4465, + CapabilitySignedZeroInfNanPreserve = 4466, + CapabilityRoundingModeRTE = 4467, + CapabilityRoundingModeRTZ = 4468, + CapabilityRayQueryProvisionalKHR = 4471, + CapabilityRayTraversalPrimitiveCullingProvisionalKHR = 4478, + CapabilityFloat16ImageAMD = 5008, + CapabilityImageGatherBiasLodAMD = 5009, + CapabilityFragmentMaskAMD = 5010, + CapabilityStencilExportEXT = 5013, + CapabilityImageReadWriteLodAMD = 5015, + CapabilityShaderClockKHR = 5055, + CapabilitySampleMaskOverrideCoverageNV = 5249, + CapabilityGeometryShaderPassthroughNV = 5251, + CapabilityShaderViewportIndexLayerEXT = 5254, + CapabilityShaderViewportIndexLayerNV = 5254, + CapabilityShaderViewportMaskNV = 5255, + CapabilityShaderStereoViewNV = 5259, + CapabilityPerViewAttributesNV = 5260, + CapabilityFragmentFullyCoveredEXT = 5265, + CapabilityMeshShadingNV = 5266, + CapabilityImageFootprintNV = 5282, + CapabilityFragmentBarycentricNV = 5284, + CapabilityComputeDerivativeGroupQuadsNV = 5288, + CapabilityFragmentDensityEXT = 5291, + CapabilityShadingRateNV = 5291, + CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, + CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, + CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, + CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, + CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, + CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, + CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, + CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, + CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + CapabilityRayTracingNV = 5340, + CapabilityVulkanMemoryModel = 5345, + CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, + CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, + CapabilityPhysicalStorageBufferAddressesEXT = 5347, + CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilityRayTracingProvisionalKHR = 5353, + CapabilityCooperativeMatrixNV = 5357, + CapabilityFragmentShaderSampleInterlockEXT = 5363, + CapabilityFragmentShaderShadingRateInterlockEXT = 5372, + CapabilityShaderSMBuiltinsNV = 5373, + CapabilityFragmentShaderPixelInterlockEXT = 5378, + CapabilityDemoteToHelperInvocationEXT = 5379, + CapabilitySubgroupShuffleINTEL = 5568, + CapabilitySubgroupBufferBlockIOINTEL = 5569, + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityIntegerFunctions2INTEL = 5584, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + CapabilityMax = 0x7fffffff, +}; + +enum RayFlagsShift { + RayFlagsOpaqueKHRShift = 0, + RayFlagsNoOpaqueKHRShift = 1, + RayFlagsTerminateOnFirstHitKHRShift = 2, + RayFlagsSkipClosestHitShaderKHRShift = 3, + RayFlagsCullBackFacingTrianglesKHRShift = 4, + RayFlagsCullFrontFacingTrianglesKHRShift = 5, + RayFlagsCullOpaqueKHRShift = 6, + RayFlagsCullNoOpaqueKHRShift = 7, + RayFlagsSkipTrianglesKHRShift = 8, + RayFlagsSkipAABBsKHRShift = 9, + RayFlagsMax = 0x7fffffff, +}; + +enum RayFlagsMask { + RayFlagsMaskNone = 0, + RayFlagsOpaqueKHRMask = 0x00000001, + RayFlagsNoOpaqueKHRMask = 0x00000002, + RayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + RayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + RayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + RayFlagsCullOpaqueKHRMask = 0x00000040, + RayFlagsCullNoOpaqueKHRMask = 0x00000080, + RayFlagsSkipTrianglesKHRMask = 0x00000100, + RayFlagsSkipAABBsKHRMask = 0x00000200, +}; + +enum RayQueryIntersection { + RayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + RayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + RayQueryIntersectionMax = 0x7fffffff, +}; + +enum RayQueryCommittedIntersectionType { + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + RayQueryCommittedIntersectionTypeMax = 0x7fffffff, +}; + +enum RayQueryCandidateIntersectionType { + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + RayQueryCandidateIntersectionTypeMax = 0x7fffffff, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpCopyLogical = 400, + OpPtrEqual = 401, + OpPtrNotEqual = 402, + OpPtrDiff = 403, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpTypeRayQueryProvisionalKHR = 4472, + OpRayQueryInitializeKHR = 4473, + OpRayQueryTerminateKHR = 4474, + OpRayQueryGenerateIntersectionKHR = 4475, + OpRayQueryConfirmIntersectionKHR = 4476, + OpRayQueryProceedKHR = 4477, + OpRayQueryGetIntersectionTypeKHR = 4479, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpReadClockKHR = 5056, + OpImageSampleFootprintNV = 5283, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionKHR = 5334, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionKHR = 5335, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayKHR = 5336, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTraceRayKHR = 5337, + OpTypeAccelerationStructureKHR = 5341, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableKHR = 5344, + OpExecuteCallableNV = 5344, + OpTypeCooperativeMatrixNV = 5358, + OpCooperativeMatrixLoadNV = 5359, + OpCooperativeMatrixStoreNV = 5360, + OpCooperativeMatrixMulAddNV = 5361, + OpCooperativeMatrixLengthNV = 5362, + OpBeginInvocationInterlockEXT = 5364, + OpEndInvocationInterlockEXT = 5365, + OpDemoteToHelperInvocationEXT = 5380, + OpIsHelperInvocationEXT = 5381, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpUCountLeadingZerosINTEL = 5585, + OpUCountTrailingZerosINTEL = 5586, + OpAbsISubINTEL = 5587, + OpAbsUSubINTEL = 5588, + OpIAddSatINTEL = 5589, + OpUAddSatINTEL = 5590, + OpIAverageINTEL = 5591, + OpUAverageINTEL = 5592, + OpIAverageRoundedINTEL = 5593, + OpUAverageRoundedINTEL = 5594, + OpISubSatINTEL = 5595, + OpUSubSatINTEL = 5596, + OpIMul32x16INTEL = 5597, + OpUMul32x16INTEL = 5598, + OpDecorateString = 5632, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateString = 5633, + OpMemberDecorateStringGOOGLE = 5633, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, + OpTypeAvcRefPayloadINTEL = 5702, + OpTypeAvcSicPayloadINTEL = 5703, + OpTypeAvcMcePayloadINTEL = 5704, + OpTypeAvcMceResultINTEL = 5705, + OpTypeAvcImeResultINTEL = 5706, + OpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + OpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + OpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + OpTypeAvcImeDualReferenceStreaminINTEL = 5710, + OpTypeAvcRefResultINTEL = 5711, + OpTypeAvcSicResultINTEL = 5712, + OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + OpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + OpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + OpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + OpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + OpSubgroupAvcMceConvertToImeResultINTEL = 5733, + OpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + OpSubgroupAvcMceConvertToRefResultINTEL = 5735, + OpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + OpSubgroupAvcMceConvertToSicResultINTEL = 5737, + OpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + OpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + OpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + OpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + OpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + OpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + OpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + OpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + OpSubgroupAvcImeInitializeINTEL = 5747, + OpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + OpSubgroupAvcImeSetDualReferenceINTEL = 5749, + OpSubgroupAvcImeRefWindowSizeINTEL = 5750, + OpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + OpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + OpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + OpSubgroupAvcImeSetWeightedSadINTEL = 5756, + OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + OpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + OpSubgroupAvcImeConvertToMceResultINTEL = 5765, + OpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + OpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + OpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + OpSubgroupAvcImeGetBorderReachedINTEL = 5776, + OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + OpSubgroupAvcFmeInitializeINTEL = 5781, + OpSubgroupAvcBmeInitializeINTEL = 5782, + OpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + OpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + OpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + OpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + OpSubgroupAvcRefConvertToMceResultINTEL = 5790, + OpSubgroupAvcSicInitializeINTEL = 5791, + OpSubgroupAvcSicConfigureSkcINTEL = 5792, + OpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + OpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + OpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + OpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + OpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + OpSubgroupAvcSicEvaluateIpeINTEL = 5803, + OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + OpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + OpSubgroupAvcSicConvertToMceResultINTEL = 5808, + OpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + OpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + OpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + OpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + OpRayQueryGetRayTMinKHR = 6016, + OpRayQueryGetRayFlagsKHR = 6017, + OpRayQueryGetIntersectionTKHR = 6018, + OpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + OpRayQueryGetIntersectionInstanceIdKHR = 6020, + OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + OpRayQueryGetIntersectionGeometryIndexKHR = 6022, + OpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + OpRayQueryGetIntersectionBarycentricsKHR = 6024, + OpRayQueryGetIntersectionFrontFaceKHR = 6025, + OpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + OpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + OpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + OpRayQueryGetWorldRayDirectionKHR = 6029, + OpRayQueryGetWorldRayOriginKHR = 6030, + OpRayQueryGetIntersectionObjectToWorldKHR = 6031, + OpRayQueryGetIntersectionWorldToObjectKHR = 6032, + OpMax = 0x7fffffff, +}; + +#ifdef SPV_ENABLE_UTILITY_CODE +inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case OpNop: *hasResult = false; *hasResultType = false; break; + case OpUndef: *hasResult = true; *hasResultType = true; break; + case OpSourceContinued: *hasResult = false; *hasResultType = false; break; + case OpSource: *hasResult = false; *hasResultType = false; break; + case OpSourceExtension: *hasResult = false; *hasResultType = false; break; + case OpName: *hasResult = false; *hasResultType = false; break; + case OpMemberName: *hasResult = false; *hasResultType = false; break; + case OpString: *hasResult = true; *hasResultType = false; break; + case OpLine: *hasResult = false; *hasResultType = false; break; + case OpExtension: *hasResult = false; *hasResultType = false; break; + case OpExtInstImport: *hasResult = true; *hasResultType = false; break; + case OpExtInst: *hasResult = true; *hasResultType = true; break; + case OpMemoryModel: *hasResult = false; *hasResultType = false; break; + case OpEntryPoint: *hasResult = false; *hasResultType = false; break; + case OpExecutionMode: *hasResult = false; *hasResultType = false; break; + case OpCapability: *hasResult = false; *hasResultType = false; break; + case OpTypeVoid: *hasResult = true; *hasResultType = false; break; + case OpTypeBool: *hasResult = true; *hasResultType = false; break; + case OpTypeInt: *hasResult = true; *hasResultType = false; break; + case OpTypeFloat: *hasResult = true; *hasResultType = false; break; + case OpTypeVector: *hasResult = true; *hasResultType = false; break; + case OpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case OpTypeImage: *hasResult = true; *hasResultType = false; break; + case OpTypeSampler: *hasResult = true; *hasResultType = false; break; + case OpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case OpTypeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case OpTypeStruct: *hasResult = true; *hasResultType = false; break; + case OpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case OpTypePointer: *hasResult = true; *hasResultType = false; break; + case OpTypeFunction: *hasResult = true; *hasResultType = false; break; + case OpTypeEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case OpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case OpTypeQueue: *hasResult = true; *hasResultType = false; break; + case OpTypePipe: *hasResult = true; *hasResultType = false; break; + case OpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case OpConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpConstant: *hasResult = true; *hasResultType = true; break; + case OpConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpConstantSampler: *hasResult = true; *hasResultType = true; break; + case OpConstantNull: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case OpSpecConstant: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case OpFunction: *hasResult = true; *hasResultType = true; break; + case OpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case OpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case OpFunctionCall: *hasResult = true; *hasResultType = true; break; + case OpVariable: *hasResult = true; *hasResultType = true; break; + case OpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case OpLoad: *hasResult = true; *hasResultType = true; break; + case OpStore: *hasResult = false; *hasResultType = false; break; + case OpCopyMemory: *hasResult = false; *hasResultType = false; break; + case OpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case OpAccessChain: *hasResult = true; *hasResultType = true; break; + case OpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case OpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpArrayLength: *hasResult = true; *hasResultType = true; break; + case OpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case OpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case OpDecorate: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case OpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case OpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case OpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case OpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case OpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case OpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case OpCopyObject: *hasResult = true; *hasResultType = true; break; + case OpTranspose: *hasResult = true; *hasResultType = true; break; + case OpSampledImage: *hasResult = true; *hasResultType = true; break; + case OpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageFetch: *hasResult = true; *hasResultType = true; break; + case OpImageGather: *hasResult = true; *hasResultType = true; break; + case OpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageRead: *hasResult = true; *hasResultType = true; break; + case OpImageWrite: *hasResult = false; *hasResultType = false; break; + case OpImage: *hasResult = true; *hasResultType = true; break; + case OpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case OpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case OpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case OpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case OpConvertFToU: *hasResult = true; *hasResultType = true; break; + case OpConvertFToS: *hasResult = true; *hasResultType = true; break; + case OpConvertSToF: *hasResult = true; *hasResultType = true; break; + case OpConvertUToF: *hasResult = true; *hasResultType = true; break; + case OpUConvert: *hasResult = true; *hasResultType = true; break; + case OpSConvert: *hasResult = true; *hasResultType = true; break; + case OpFConvert: *hasResult = true; *hasResultType = true; break; + case OpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case OpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case OpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case OpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case OpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case OpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case OpBitcast: *hasResult = true; *hasResultType = true; break; + case OpSNegate: *hasResult = true; *hasResultType = true; break; + case OpFNegate: *hasResult = true; *hasResultType = true; break; + case OpIAdd: *hasResult = true; *hasResultType = true; break; + case OpFAdd: *hasResult = true; *hasResultType = true; break; + case OpISub: *hasResult = true; *hasResultType = true; break; + case OpFSub: *hasResult = true; *hasResultType = true; break; + case OpIMul: *hasResult = true; *hasResultType = true; break; + case OpFMul: *hasResult = true; *hasResultType = true; break; + case OpUDiv: *hasResult = true; *hasResultType = true; break; + case OpSDiv: *hasResult = true; *hasResultType = true; break; + case OpFDiv: *hasResult = true; *hasResultType = true; break; + case OpUMod: *hasResult = true; *hasResultType = true; break; + case OpSRem: *hasResult = true; *hasResultType = true; break; + case OpSMod: *hasResult = true; *hasResultType = true; break; + case OpFRem: *hasResult = true; *hasResultType = true; break; + case OpFMod: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case OpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case OpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case OpOuterProduct: *hasResult = true; *hasResultType = true; break; + case OpDot: *hasResult = true; *hasResultType = true; break; + case OpIAddCarry: *hasResult = true; *hasResultType = true; break; + case OpISubBorrow: *hasResult = true; *hasResultType = true; break; + case OpUMulExtended: *hasResult = true; *hasResultType = true; break; + case OpSMulExtended: *hasResult = true; *hasResultType = true; break; + case OpAny: *hasResult = true; *hasResultType = true; break; + case OpAll: *hasResult = true; *hasResultType = true; break; + case OpIsNan: *hasResult = true; *hasResultType = true; break; + case OpIsInf: *hasResult = true; *hasResultType = true; break; + case OpIsFinite: *hasResult = true; *hasResultType = true; break; + case OpIsNormal: *hasResult = true; *hasResultType = true; break; + case OpSignBitSet: *hasResult = true; *hasResultType = true; break; + case OpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case OpOrdered: *hasResult = true; *hasResultType = true; break; + case OpUnordered: *hasResult = true; *hasResultType = true; break; + case OpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case OpLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpLogicalNot: *hasResult = true; *hasResultType = true; break; + case OpSelect: *hasResult = true; *hasResultType = true; break; + case OpIEqual: *hasResult = true; *hasResultType = true; break; + case OpINotEqual: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpULessThan: *hasResult = true; *hasResultType = true; break; + case OpSLessThan: *hasResult = true; *hasResultType = true; break; + case OpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case OpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case OpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case OpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case OpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case OpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpNot: *hasResult = true; *hasResultType = true; break; + case OpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case OpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case OpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case OpBitReverse: *hasResult = true; *hasResultType = true; break; + case OpBitCount: *hasResult = true; *hasResultType = true; break; + case OpDPdx: *hasResult = true; *hasResultType = true; break; + case OpDPdy: *hasResult = true; *hasResultType = true; break; + case OpFwidth: *hasResult = true; *hasResultType = true; break; + case OpDPdxFine: *hasResult = true; *hasResultType = true; break; + case OpDPdyFine: *hasResult = true; *hasResultType = true; break; + case OpFwidthFine: *hasResult = true; *hasResultType = true; break; + case OpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case OpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case OpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case OpEmitVertex: *hasResult = false; *hasResultType = false; break; + case OpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case OpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case OpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case OpControlBarrier: *hasResult = false; *hasResultType = false; break; + case OpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case OpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case OpAtomicStore: *hasResult = false; *hasResultType = false; break; + case OpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case OpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case OpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case OpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case OpAtomicISub: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case OpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case OpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case OpAtomicOr: *hasResult = true; *hasResultType = true; break; + case OpAtomicXor: *hasResult = true; *hasResultType = true; break; + case OpPhi: *hasResult = true; *hasResultType = true; break; + case OpLoopMerge: *hasResult = false; *hasResultType = false; break; + case OpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case OpLabel: *hasResult = true; *hasResultType = false; break; + case OpBranch: *hasResult = false; *hasResultType = false; break; + case OpBranchConditional: *hasResult = false; *hasResultType = false; break; + case OpSwitch: *hasResult = false; *hasResultType = false; break; + case OpKill: *hasResult = false; *hasResultType = false; break; + case OpReturn: *hasResult = false; *hasResultType = false; break; + case OpReturnValue: *hasResult = false; *hasResultType = false; break; + case OpUnreachable: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case OpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case OpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case OpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case OpGroupAll: *hasResult = true; *hasResultType = true; break; + case OpGroupAny: *hasResult = true; *hasResultType = true; break; + case OpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupSMax: *hasResult = true; *hasResultType = true; break; + case OpReadPipe: *hasResult = true; *hasResultType = true; break; + case OpWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case OpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case OpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case OpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case OpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case OpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case OpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case OpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case OpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case OpRetainEvent: *hasResult = false; *hasResultType = false; break; + case OpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case OpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case OpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case OpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case OpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case OpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case OpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case OpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case OpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case OpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case OpNoLine: *hasResult = false; *hasResultType = false; break; + case OpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case OpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case OpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case OpSizeOf: *hasResult = true; *hasResultType = true; break; + case OpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case OpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case OpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case OpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case OpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case OpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case OpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case OpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case OpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case OpDecorateId: *hasResult = false; *hasResultType = false; break; + case OpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case OpCopyLogical: *hasResult = true; *hasResultType = true; break; + case OpPtrEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpTypeRayQueryProvisionalKHR: *hasResult = true; *hasResultType = false; break; + case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case OpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case OpTraceNV: *hasResult = false; *hasResultType = false; break; + case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpDemoteToHelperInvocationEXT: *hasResult = false; *hasResultType = false; break; + case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case OpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case OpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case OpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case OpDecorateString: *hasResult = false; *hasResultType = false; break; + case OpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case OpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case OpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP + diff --git a/dep/spirv-cross/spirv_cfg.cpp b/dep/spirv-cross/spirv_cfg.cpp new file mode 100644 index 000000000..fa9f3d173 --- /dev/null +++ b/dep/spirv-cross/spirv_cfg.cpp @@ -0,0 +1,404 @@ +/* + * Copyright 2016-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_cfg.hpp" +#include "spirv_cross.hpp" +#include +#include + +using namespace std; + +namespace SPIRV_CROSS_NAMESPACE +{ +CFG::CFG(Compiler &compiler_, const SPIRFunction &func_) + : compiler(compiler_) + , func(func_) +{ + build_post_order_visit_order(); + build_immediate_dominators(); +} + +uint32_t CFG::find_common_dominator(uint32_t a, uint32_t b) const +{ + while (a != b) + { + if (get_visit_order(a) < get_visit_order(b)) + a = get_immediate_dominator(a); + else + b = get_immediate_dominator(b); + } + return a; +} + +void CFG::build_immediate_dominators() +{ + // Traverse the post-order in reverse and build up the immediate dominator tree. + immediate_dominators.clear(); + immediate_dominators[func.entry_block] = func.entry_block; + + for (auto i = post_order.size(); i; i--) + { + uint32_t block = post_order[i - 1]; + auto &pred = preceding_edges[block]; + if (pred.empty()) // This is for the entry block, but we've already set up the dominators. + continue; + + for (auto &edge : pred) + { + if (immediate_dominators[block]) + { + assert(immediate_dominators[edge]); + immediate_dominators[block] = find_common_dominator(immediate_dominators[block], edge); + } + else + immediate_dominators[block] = edge; + } + } +} + +bool CFG::is_back_edge(uint32_t to) const +{ + // We have a back edge if the visit order is set with the temporary magic value 0. + // Crossing edges will have already been recorded with a visit order. + auto itr = visit_order.find(to); + return itr != end(visit_order) && itr->second.get() == 0; +} + +bool CFG::has_visited_forward_edge(uint32_t to) const +{ + // If > 0, we have visited the edge already, and this is not a back edge branch. + auto itr = visit_order.find(to); + return itr != end(visit_order) && itr->second.get() > 0; +} + +bool CFG::post_order_visit(uint32_t block_id) +{ + // If we have already branched to this block (back edge), stop recursion. + // If our branches are back-edges, we do not record them. + // We have to record crossing edges however. + if (has_visited_forward_edge(block_id)) + return true; + else if (is_back_edge(block_id)) + return false; + + // Block back-edges from recursively revisiting ourselves. + visit_order[block_id].get() = 0; + + auto &block = compiler.get(block_id); + + // If this is a loop header, add an implied branch to the merge target. + // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. + // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. + // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. + // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG. + + // Make a point out of visiting merge target first. This is to make sure that post visit order outside the loop + // is lower than inside the loop, which is going to be key for some traversal algorithms like post-dominance analysis. + // For selection constructs true/false blocks will end up visiting the merge block directly and it works out fine, + // but for loops, only the header might end up actually branching to merge block. + if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) + add_branch(block_id, block.merge_block); + + // First visit our branch targets. + switch (block.terminator) + { + case SPIRBlock::Direct: + if (post_order_visit(block.next_block)) + add_branch(block_id, block.next_block); + break; + + case SPIRBlock::Select: + if (post_order_visit(block.true_block)) + add_branch(block_id, block.true_block); + if (post_order_visit(block.false_block)) + add_branch(block_id, block.false_block); + break; + + case SPIRBlock::MultiSelect: + for (auto &target : block.cases) + { + if (post_order_visit(target.block)) + add_branch(block_id, target.block); + } + if (block.default_block && post_order_visit(block.default_block)) + add_branch(block_id, block.default_block); + break; + + default: + break; + } + + // If this is a selection merge, add an implied branch to the merge target. + // This is needed to avoid cases where an inner branch dominates the outer branch. + // This can happen if one of the branches exit early, e.g.: + // if (cond) { ...; break; } else { var = 100 } use_var(var); + // We can use the variable without a Phi since there is only one possible parent here. + // However, in this case, we need to hoist out the inner variable to outside the branch. + // Use same strategy as loops. + if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block)) + { + // If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup. + // Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement + // will be hoisted out to outside the selection merge. + // If size > 1, the variable will be automatically hoisted, so we should not mess with it. + // The exception here is switch blocks, where we can have multiple edges to merge block, + // all coming from same scope, so be more conservative in this case. + // Adding fake branches unconditionally breaks parameter preservation analysis, + // which looks at how variables are accessed through the CFG. + auto pred_itr = preceding_edges.find(block.next_block); + if (pred_itr != end(preceding_edges)) + { + auto &pred = pred_itr->second; + auto succ_itr = succeeding_edges.find(block_id); + size_t num_succeeding_edges = 0; + if (succ_itr != end(succeeding_edges)) + num_succeeding_edges = succ_itr->second.size(); + + if (block.terminator == SPIRBlock::MultiSelect && num_succeeding_edges == 1) + { + // Multiple branches can come from the same scope due to "break;", so we need to assume that all branches + // come from same case scope in worst case, even if there are multiple preceding edges. + // If we have more than one succeeding edge from the block header, it should be impossible + // to have a dominator be inside the block. + // Only case this can go wrong is if we have 2 or more edges from block header and + // 2 or more edges to merge block, and still have dominator be inside a case label. + if (!pred.empty()) + add_branch(block_id, block.next_block); + } + else + { + if (pred.size() == 1 && *pred.begin() != block_id) + add_branch(block_id, block.next_block); + } + } + else + { + // If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it. + // We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge. + add_branch(block_id, block.next_block); + } + } + + // Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges. + visit_order[block_id].get() = ++visit_count; + post_order.push_back(block_id); + return true; +} + +void CFG::build_post_order_visit_order() +{ + uint32_t block = func.entry_block; + visit_count = 0; + visit_order.clear(); + post_order.clear(); + post_order_visit(block); +} + +void CFG::add_branch(uint32_t from, uint32_t to) +{ + const auto add_unique = [](SmallVector &l, uint32_t value) { + auto itr = find(begin(l), end(l), value); + if (itr == end(l)) + l.push_back(value); + }; + add_unique(preceding_edges[to], from); + add_unique(succeeding_edges[from], to); +} + +uint32_t CFG::find_loop_dominator(uint32_t block_id) const +{ + while (block_id != SPIRBlock::NoDominator) + { + auto itr = preceding_edges.find(block_id); + if (itr == end(preceding_edges)) + return SPIRBlock::NoDominator; + if (itr->second.empty()) + return SPIRBlock::NoDominator; + + uint32_t pred_block_id = SPIRBlock::NoDominator; + bool ignore_loop_header = false; + + // If we are a merge block, go directly to the header block. + // Only consider a loop dominator if we are branching from inside a block to a loop header. + // NOTE: In the CFG we forced an edge from header to merge block always to support variable scopes properly. + for (auto &pred : itr->second) + { + auto &pred_block = compiler.get(pred); + if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == ID(block_id)) + { + pred_block_id = pred; + ignore_loop_header = true; + break; + } + else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == ID(block_id)) + { + pred_block_id = pred; + break; + } + } + + // No merge block means we can just pick any edge. Loop headers dominate the inner loop, so any path we + // take will lead there. + if (pred_block_id == SPIRBlock::NoDominator) + pred_block_id = itr->second.front(); + + block_id = pred_block_id; + + if (!ignore_loop_header && block_id) + { + auto &block = compiler.get(block_id); + if (block.merge == SPIRBlock::MergeLoop) + return block_id; + } + } + + return block_id; +} + +bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const +{ + // Walk backwards, starting from "to" block. + // Only follow pred edges if they have a 1:1 relationship, or a merge relationship. + // If we cannot find a path to "from", we must assume that to is inside control flow in some way. + + auto &from_block = compiler.get(from); + BlockID ignore_block_id = 0; + if (from_block.merge == SPIRBlock::MergeLoop) + ignore_block_id = from_block.merge_block; + + while (to != from) + { + auto pred_itr = preceding_edges.find(to); + if (pred_itr == end(preceding_edges)) + return false; + + DominatorBuilder builder(*this); + for (auto &edge : pred_itr->second) + builder.add_block(edge); + + uint32_t dominator = builder.get_dominator(); + if (dominator == 0) + return false; + + auto &dom = compiler.get(dominator); + + bool true_path_ignore = false; + bool false_path_ignore = false; + if (ignore_block_id && dom.terminator == SPIRBlock::Select) + { + auto &true_block = compiler.get(dom.true_block); + auto &false_block = compiler.get(dom.false_block); + auto &ignore_block = compiler.get(ignore_block_id); + true_path_ignore = compiler.execution_is_branchless(true_block, ignore_block); + false_path_ignore = compiler.execution_is_branchless(false_block, ignore_block); + } + + if ((dom.merge == SPIRBlock::MergeSelection && dom.next_block == to) || + (dom.merge == SPIRBlock::MergeLoop && dom.merge_block == to) || + (dom.terminator == SPIRBlock::Direct && dom.next_block == to) || + (dom.terminator == SPIRBlock::Select && dom.true_block == to && false_path_ignore) || + (dom.terminator == SPIRBlock::Select && dom.false_block == to && true_path_ignore)) + { + // Allow walking selection constructs if the other branch reaches out of a loop construct. + // It cannot be in-scope anymore. + to = dominator; + } + else + return false; + } + + return true; +} + +DominatorBuilder::DominatorBuilder(const CFG &cfg_) + : cfg(cfg_) +{ +} + +void DominatorBuilder::add_block(uint32_t block) +{ + if (!cfg.get_immediate_dominator(block)) + { + // Unreachable block via the CFG, we will never emit this code anyways. + return; + } + + if (!dominator) + { + dominator = block; + return; + } + + if (block != dominator) + dominator = cfg.find_common_dominator(block, dominator); +} + +void DominatorBuilder::lift_continue_block_dominator() +{ + // It is possible for a continue block to be the dominator of a variable is only accessed inside the while block of a do-while loop. + // We cannot safely declare variables inside a continue block, so move any variable declared + // in a continue block to the entry block to simplify. + // It makes very little sense for a continue block to ever be a dominator, so fall back to the simplest + // solution. + + if (!dominator) + return; + + auto &block = cfg.get_compiler().get(dominator); + auto post_order = cfg.get_visit_order(dominator); + + // If we are branching to a block with a higher post-order traversal index (continue blocks), we have a problem + // since we cannot create sensible GLSL code for this, fallback to entry block. + bool back_edge_dominator = false; + switch (block.terminator) + { + case SPIRBlock::Direct: + if (cfg.get_visit_order(block.next_block) > post_order) + back_edge_dominator = true; + break; + + case SPIRBlock::Select: + if (cfg.get_visit_order(block.true_block) > post_order) + back_edge_dominator = true; + if (cfg.get_visit_order(block.false_block) > post_order) + back_edge_dominator = true; + break; + + case SPIRBlock::MultiSelect: + for (auto &target : block.cases) + { + if (cfg.get_visit_order(target.block) > post_order) + back_edge_dominator = true; + } + if (block.default_block && cfg.get_visit_order(block.default_block) > post_order) + back_edge_dominator = true; + break; + + default: + break; + } + + if (back_edge_dominator) + dominator = cfg.get_function().entry_block; +} +} // namespace SPIRV_CROSS_NAMESPACE diff --git a/dep/spirv-cross/spirv_cfg.hpp b/dep/spirv-cross/spirv_cfg.hpp new file mode 100644 index 000000000..b54599339 --- /dev/null +++ b/dep/spirv-cross/spirv_cfg.hpp @@ -0,0 +1,163 @@ +/* + * Copyright 2016-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_CFG_HPP +#define SPIRV_CROSS_CFG_HPP + +#include "spirv_common.hpp" +#include + +namespace SPIRV_CROSS_NAMESPACE +{ +class Compiler; +class CFG +{ +public: + CFG(Compiler &compiler, const SPIRFunction &function); + + Compiler &get_compiler() + { + return compiler; + } + + const Compiler &get_compiler() const + { + return compiler; + } + + const SPIRFunction &get_function() const + { + return func; + } + + uint32_t get_immediate_dominator(uint32_t block) const + { + auto itr = immediate_dominators.find(block); + if (itr != std::end(immediate_dominators)) + return itr->second; + else + return 0; + } + + uint32_t get_visit_order(uint32_t block) const + { + auto itr = visit_order.find(block); + assert(itr != std::end(visit_order)); + int v = itr->second.get(); + assert(v > 0); + return uint32_t(v); + } + + uint32_t find_common_dominator(uint32_t a, uint32_t b) const; + + const SmallVector &get_preceding_edges(uint32_t block) const + { + auto itr = preceding_edges.find(block); + if (itr != std::end(preceding_edges)) + return itr->second; + else + return empty_vector; + } + + const SmallVector &get_succeeding_edges(uint32_t block) const + { + auto itr = succeeding_edges.find(block); + if (itr != std::end(succeeding_edges)) + return itr->second; + else + return empty_vector; + } + + template + void walk_from(std::unordered_set &seen_blocks, uint32_t block, const Op &op) const + { + if (seen_blocks.count(block)) + return; + seen_blocks.insert(block); + + if (op(block)) + { + for (auto b : get_succeeding_edges(block)) + walk_from(seen_blocks, b, op); + } + } + + uint32_t find_loop_dominator(uint32_t block) const; + + bool node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const; + +private: + struct VisitOrder + { + int &get() + { + return v; + } + + const int &get() const + { + return v; + } + + int v = -1; + }; + + Compiler &compiler; + const SPIRFunction &func; + std::unordered_map> preceding_edges; + std::unordered_map> succeeding_edges; + std::unordered_map immediate_dominators; + std::unordered_map visit_order; + SmallVector post_order; + SmallVector empty_vector; + + void add_branch(uint32_t from, uint32_t to); + void build_post_order_visit_order(); + void build_immediate_dominators(); + bool post_order_visit(uint32_t block); + uint32_t visit_count = 0; + + bool is_back_edge(uint32_t to) const; + bool has_visited_forward_edge(uint32_t to) const; +}; + +class DominatorBuilder +{ +public: + DominatorBuilder(const CFG &cfg); + + void add_block(uint32_t block); + uint32_t get_dominator() const + { + return dominator; + } + + void lift_continue_block_dominator(); + +private: + const CFG &cfg; + uint32_t dominator = 0; +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_common.hpp b/dep/spirv-cross/spirv_common.hpp new file mode 100644 index 000000000..ecb840772 --- /dev/null +++ b/dep/spirv-cross/spirv_common.hpp @@ -0,0 +1,1805 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_COMMON_HPP +#define SPIRV_CROSS_COMMON_HPP + +#include "spirv.hpp" +#include "spirv_cross_containers.hpp" +#include "spirv_cross_error_handling.hpp" +#include + +// A bit crude, but allows projects which embed SPIRV-Cross statically to +// effectively hide all the symbols from other projects. +// There is a case where we have: +// - Project A links against SPIRV-Cross statically. +// - Project A links against Project B statically. +// - Project B links against SPIRV-Cross statically (might be a different version). +// This leads to a conflict with extremely bizarre results. +// By overriding the namespace in one of the project builds, we can work around this. +// If SPIRV-Cross is embedded in dynamic libraries, +// prefer using -fvisibility=hidden on GCC/Clang instead. +#ifdef SPIRV_CROSS_NAMESPACE_OVERRIDE +#define SPIRV_CROSS_NAMESPACE SPIRV_CROSS_NAMESPACE_OVERRIDE +#else +#define SPIRV_CROSS_NAMESPACE spirv_cross +#endif + +namespace SPIRV_CROSS_NAMESPACE +{ +namespace inner +{ +template +void join_helper(StringStream<> &stream, T &&t) +{ + stream << std::forward(t); +} + +template +void join_helper(StringStream<> &stream, T &&t, Ts &&... ts) +{ + stream << std::forward(t); + join_helper(stream, std::forward(ts)...); +} +} // namespace inner + +class Bitset +{ +public: + Bitset() = default; + explicit inline Bitset(uint64_t lower_) + : lower(lower_) + { + } + + inline bool get(uint32_t bit) const + { + if (bit < 64) + return (lower & (1ull << bit)) != 0; + else + return higher.count(bit) != 0; + } + + inline void set(uint32_t bit) + { + if (bit < 64) + lower |= 1ull << bit; + else + higher.insert(bit); + } + + inline void clear(uint32_t bit) + { + if (bit < 64) + lower &= ~(1ull << bit); + else + higher.erase(bit); + } + + inline uint64_t get_lower() const + { + return lower; + } + + inline void reset() + { + lower = 0; + higher.clear(); + } + + inline void merge_and(const Bitset &other) + { + lower &= other.lower; + std::unordered_set tmp_set; + for (auto &v : higher) + if (other.higher.count(v) != 0) + tmp_set.insert(v); + higher = std::move(tmp_set); + } + + inline void merge_or(const Bitset &other) + { + lower |= other.lower; + for (auto &v : other.higher) + higher.insert(v); + } + + inline bool operator==(const Bitset &other) const + { + if (lower != other.lower) + return false; + + if (higher.size() != other.higher.size()) + return false; + + for (auto &v : higher) + if (other.higher.count(v) == 0) + return false; + + return true; + } + + inline bool operator!=(const Bitset &other) const + { + return !(*this == other); + } + + template + void for_each_bit(const Op &op) const + { + // TODO: Add ctz-based iteration. + for (uint32_t i = 0; i < 64; i++) + { + if (lower & (1ull << i)) + op(i); + } + + if (higher.empty()) + return; + + // Need to enforce an order here for reproducible results, + // but hitting this path should happen extremely rarely, so having this slow path is fine. + SmallVector bits; + bits.reserve(higher.size()); + for (auto &v : higher) + bits.push_back(v); + std::sort(std::begin(bits), std::end(bits)); + + for (auto &v : bits) + op(v); + } + + inline bool empty() const + { + return lower == 0 && higher.empty(); + } + +private: + // The most common bits to set are all lower than 64, + // so optimize for this case. Bits spilling outside 64 go into a slower data structure. + // In almost all cases, higher data structure will not be used. + uint64_t lower = 0; + std::unordered_set higher; +}; + +// Helper template to avoid lots of nasty string temporary munging. +template +std::string join(Ts &&... ts) +{ + StringStream<> stream; + inner::join_helper(stream, std::forward(ts)...); + return stream.str(); +} + +inline std::string merge(const SmallVector &list, const char *between = ", ") +{ + StringStream<> stream; + for (auto &elem : list) + { + stream << elem; + if (&elem != &list.back()) + stream << between; + } + return stream.str(); +} + +// Make sure we don't accidentally call this with float or doubles with SFINAE. +// Have to use the radix-aware overload. +template ::value, int>::type = 0> +inline std::string convert_to_string(const T &t) +{ + return std::to_string(t); +} + +// Allow implementations to set a convenient standard precision +#ifndef SPIRV_CROSS_FLT_FMT +#define SPIRV_CROSS_FLT_FMT "%.32g" +#endif + +// Disable sprintf and strcat warnings. +// We cannot rely on snprintf and family existing because, ..., MSVC. +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#elif defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +static inline void fixup_radix_point(char *str, char radix_point) +{ + // Setting locales is a very risky business in multi-threaded program, + // so just fixup locales instead. We only need to care about the radix point. + if (radix_point != '.') + { + while (*str != '\0') + { + if (*str == radix_point) + *str = '.'; + str++; + } + } +} + +inline std::string convert_to_string(float t, char locale_radix_point) +{ + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, SPIRV_CROSS_FLT_FMT, t); + fixup_radix_point(buf, locale_radix_point); + + // Ensure that the literal is float. + if (!strchr(buf, '.') && !strchr(buf, 'e')) + strcat(buf, ".0"); + return buf; +} + +inline std::string convert_to_string(double t, char locale_radix_point) +{ + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, SPIRV_CROSS_FLT_FMT, t); + fixup_radix_point(buf, locale_radix_point); + + // Ensure that the literal is float. + if (!strchr(buf, '.') && !strchr(buf, 'e')) + strcat(buf, ".0"); + return buf; +} + +template +struct ValueSaver +{ + explicit ValueSaver(T ¤t_) + : current(current_) + , saved(current_) + { + } + + void release() + { + current = saved; + } + + ~ValueSaver() + { + release(); + } + + T ¤t; + T saved; +}; + +#if defined(__clang__) || defined(__GNUC__) +#pragma GCC diagnostic pop +#elif defined(_MSC_VER) +#pragma warning(pop) +#endif + +struct Instruction +{ + uint16_t op = 0; + uint16_t count = 0; + uint32_t offset = 0; + uint32_t length = 0; +}; + +enum Types +{ + TypeNone, + TypeType, + TypeVariable, + TypeConstant, + TypeFunction, + TypeFunctionPrototype, + TypeBlock, + TypeExtension, + TypeExpression, + TypeConstantOp, + TypeCombinedImageSampler, + TypeAccessChain, + TypeUndef, + TypeString, + TypeCount +}; + +template +class TypedID; + +template <> +class TypedID +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + template + TypedID(const TypedID &other) + { + *this = other; + } + + template + TypedID &operator=(const TypedID &other) + { + id = uint32_t(other); + return *this; + } + + // Implicit conversion to u32 is desired here. + // As long as we block implicit conversion between TypedID and TypedID we're good. + operator uint32_t() const + { + return id; + } + + template + operator TypedID() const + { + return TypedID(*this); + } + +private: + uint32_t id = 0; +}; + +template +class TypedID +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + explicit TypedID(const TypedID &other) + : id(uint32_t(other)) + { + } + + operator uint32_t() const + { + return id; + } + +private: + uint32_t id = 0; +}; + +using VariableID = TypedID; +using TypeID = TypedID; +using ConstantID = TypedID; +using FunctionID = TypedID; +using BlockID = TypedID; +using ID = TypedID; + +// Helper for Variant interface. +struct IVariant +{ + virtual ~IVariant() = default; + virtual IVariant *clone(ObjectPoolBase *pool) = 0; + ID self = 0; +}; + +#define SPIRV_CROSS_DECLARE_CLONE(T) \ + IVariant *clone(ObjectPoolBase *pool) override \ + { \ + return static_cast *>(pool)->allocate(*this); \ + } + +struct SPIRUndef : IVariant +{ + enum + { + type = TypeUndef + }; + + explicit SPIRUndef(TypeID basetype_) + : basetype(basetype_) + { + } + TypeID basetype; + + SPIRV_CROSS_DECLARE_CLONE(SPIRUndef) +}; + +struct SPIRString : IVariant +{ + enum + { + type = TypeString + }; + + explicit SPIRString(std::string str_) + : str(std::move(str_)) + { + } + + std::string str; + + SPIRV_CROSS_DECLARE_CLONE(SPIRString) +}; + +// This type is only used by backends which need to access the combined image and sampler IDs separately after +// the OpSampledImage opcode. +struct SPIRCombinedImageSampler : IVariant +{ + enum + { + type = TypeCombinedImageSampler + }; + SPIRCombinedImageSampler(TypeID type_, VariableID image_, VariableID sampler_) + : combined_type(type_) + , image(image_) + , sampler(sampler_) + { + } + TypeID combined_type; + VariableID image; + VariableID sampler; + + SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler) +}; + +struct SPIRConstantOp : IVariant +{ + enum + { + type = TypeConstantOp + }; + + SPIRConstantOp(TypeID result_type, spv::Op op, const uint32_t *args, uint32_t length) + : opcode(op) + , basetype(result_type) + { + arguments.reserve(length); + for (uint32_t i = 0; i < length; i++) + arguments.push_back(args[i]); + } + + spv::Op opcode; + SmallVector arguments; + TypeID basetype; + + SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp) +}; + +struct SPIRType : IVariant +{ + enum + { + type = TypeType + }; + + enum BaseType + { + Unknown, + Void, + Boolean, + SByte, + UByte, + Short, + UShort, + Int, + UInt, + Int64, + UInt64, + AtomicCounter, + Half, + Float, + Double, + Struct, + Image, + SampledImage, + Sampler, + AccelerationStructure, + RayQuery, + + // Keep internal types at the end. + ControlPointArray, + Interpolant, + Char + }; + + // Scalar/vector/matrix support. + BaseType basetype = Unknown; + uint32_t width = 0; + uint32_t vecsize = 1; + uint32_t columns = 1; + + // Arrays, support array of arrays by having a vector of array sizes. + SmallVector array; + + // Array elements can be either specialization constants or specialization ops. + // This array determines how to interpret the array size. + // If an element is true, the element is a literal, + // otherwise, it's an expression, which must be resolved on demand. + // The actual size is not really known until runtime. + SmallVector array_size_literal; + + // Pointers + // Keep track of how many pointer layers we have. + uint32_t pointer_depth = 0; + bool pointer = false; + bool forward_pointer = false; + + spv::StorageClass storage = spv::StorageClassGeneric; + + SmallVector member_types; + + // If member order has been rewritten to handle certain scenarios with Offset, + // allow codegen to rewrite the index. + SmallVector member_type_index_redirection; + + struct ImageType + { + TypeID type; + spv::Dim dim; + bool depth; + bool arrayed; + bool ms; + uint32_t sampled; + spv::ImageFormat format; + spv::AccessQualifier access; + } image; + + // Structs can be declared multiple times if they are used as part of interface blocks. + // We want to detect this so that we only emit the struct definition once. + // Since we cannot rely on OpName to be equal, we need to figure out aliases. + TypeID type_alias = 0; + + // Denotes the type which this type is based on. + // Allows the backend to traverse how a complex type is built up during access chains. + TypeID parent_type = 0; + + // Used in backends to avoid emitting members with conflicting names. + std::unordered_set member_name_cache; + + SPIRV_CROSS_DECLARE_CLONE(SPIRType) +}; + +struct SPIRExtension : IVariant +{ + enum + { + type = TypeExtension + }; + + enum Extension + { + Unsupported, + GLSL, + SPV_debug_info, + SPV_AMD_shader_ballot, + SPV_AMD_shader_explicit_vertex_parameter, + SPV_AMD_shader_trinary_minmax, + SPV_AMD_gcn_shader + }; + + explicit SPIRExtension(Extension ext_) + : ext(ext_) + { + } + + Extension ext; + SPIRV_CROSS_DECLARE_CLONE(SPIRExtension) +}; + +// SPIREntryPoint is not a variant since its IDs are used to decorate OpFunction, +// so in order to avoid conflicts, we can't stick them in the ids array. +struct SPIREntryPoint +{ + SPIREntryPoint(FunctionID self_, spv::ExecutionModel execution_model, const std::string &entry_name) + : self(self_) + , name(entry_name) + , orig_name(entry_name) + , model(execution_model) + { + } + SPIREntryPoint() = default; + + FunctionID self = 0; + std::string name; + std::string orig_name; + SmallVector interface_variables; + + Bitset flags; + struct WorkgroupSize + { + uint32_t x = 0, y = 0, z = 0; + uint32_t constant = 0; // Workgroup size can be expressed as a constant/spec-constant instead. + } workgroup_size; + uint32_t invocations = 0; + uint32_t output_vertices = 0; + spv::ExecutionModel model = spv::ExecutionModelMax; + bool geometry_passthrough = false; +}; + +struct SPIRExpression : IVariant +{ + enum + { + type = TypeExpression + }; + + // Only created by the backend target to avoid creating tons of temporaries. + SPIRExpression(std::string expr, TypeID expression_type_, bool immutable_) + : expression(move(expr)) + , expression_type(expression_type_) + , immutable(immutable_) + { + } + + // If non-zero, prepend expression with to_expression(base_expression). + // Used in amortizing multiple calls to to_expression() + // where in certain cases that would quickly force a temporary when not needed. + ID base_expression = 0; + + std::string expression; + TypeID expression_type = 0; + + // If this expression is a forwarded load, + // allow us to reference the original variable. + ID loaded_from = 0; + + // If this expression will never change, we can avoid lots of temporaries + // in high level source. + // An expression being immutable can be speculative, + // it is assumed that this is true almost always. + bool immutable = false; + + // Before use, this expression must be transposed. + // This is needed for targets which don't support row_major layouts. + bool need_transpose = false; + + // Whether or not this is an access chain expression. + bool access_chain = false; + + // A list of expressions which this expression depends on. + SmallVector expression_dependencies; + + // By reading this expression, we implicitly read these expressions as well. + // Used by access chain Store and Load since we read multiple expressions in this case. + SmallVector implied_read_expressions; + + // The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads. + uint32_t emitted_loop_level = 0; + + SPIRV_CROSS_DECLARE_CLONE(SPIRExpression) +}; + +struct SPIRFunctionPrototype : IVariant +{ + enum + { + type = TypeFunctionPrototype + }; + + explicit SPIRFunctionPrototype(TypeID return_type_) + : return_type(return_type_) + { + } + + TypeID return_type; + SmallVector parameter_types; + + SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype) +}; + +struct SPIRBlock : IVariant +{ + enum + { + type = TypeBlock + }; + + enum Terminator + { + Unknown, + Direct, // Emit next block directly without a particular condition. + + Select, // Block ends with an if/else block. + MultiSelect, // Block ends with switch statement. + + Return, // Block ends with return. + Unreachable, // Noop + Kill // Discard + }; + + enum Merge + { + MergeNone, + MergeLoop, + MergeSelection + }; + + enum Hints + { + HintNone, + HintUnroll, + HintDontUnroll, + HintFlatten, + HintDontFlatten + }; + + enum Method + { + MergeToSelectForLoop, + MergeToDirectForLoop, + MergeToSelectContinueForLoop + }; + + enum ContinueBlockType + { + ContinueNone, + + // Continue block is branchless and has at least one instruction. + ForLoop, + + // Noop continue block. + WhileLoop, + + // Continue block is conditional. + DoWhileLoop, + + // Highly unlikely that anything will use this, + // since it is really awkward/impossible to express in GLSL. + ComplexLoop + }; + + enum : uint32_t + { + NoDominator = 0xffffffffu + }; + + Terminator terminator = Unknown; + Merge merge = MergeNone; + Hints hint = HintNone; + BlockID next_block = 0; + BlockID merge_block = 0; + BlockID continue_block = 0; + + ID return_value = 0; // If 0, return nothing (void). + ID condition = 0; + BlockID true_block = 0; + BlockID false_block = 0; + BlockID default_block = 0; + + SmallVector ops; + + struct Phi + { + ID local_variable; // flush local variable ... + BlockID parent; // If we're in from_block and want to branch into this block ... + VariableID function_variable; // to this function-global "phi" variable first. + }; + + // Before entering this block flush out local variables to magical "phi" variables. + SmallVector phi_variables; + + // Declare these temporaries before beginning the block. + // Used for handling complex continue blocks which have side effects. + SmallVector> declare_temporary; + + // Declare these temporaries, but only conditionally if this block turns out to be + // a complex loop header. + SmallVector> potential_declare_temporary; + + struct Case + { + uint32_t value; + BlockID block; + }; + SmallVector cases; + + // If we have tried to optimize code for this block but failed, + // keep track of this. + bool disable_block_optimization = false; + + // If the continue block is complex, fallback to "dumb" for loops. + bool complex_continue = false; + + // Do we need a ladder variable to defer breaking out of a loop construct after a switch block? + bool need_ladder_break = false; + + // If marked, we have explicitly handled Phi from this block, so skip any flushes related to that on a branch. + // Used to handle an edge case with switch and case-label fallthrough where fall-through writes to Phi. + BlockID ignore_phi_from_block = 0; + + // The dominating block which this block might be within. + // Used in continue; blocks to determine if we really need to write continue. + BlockID loop_dominator = 0; + + // All access to these variables are dominated by this block, + // so before branching anywhere we need to make sure that we declare these variables. + SmallVector dominated_variables; + + // These are variables which should be declared in a for loop header, if we + // fail to use a classic for-loop, + // we remove these variables, and fall back to regular variables outside the loop. + SmallVector loop_variables; + + // Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or + // sub-group-like operations. + // Make sure that we only use these expressions in the original block. + SmallVector invalidate_expressions; + + SPIRV_CROSS_DECLARE_CLONE(SPIRBlock) +}; + +struct SPIRFunction : IVariant +{ + enum + { + type = TypeFunction + }; + + SPIRFunction(TypeID return_type_, TypeID function_type_) + : return_type(return_type_) + , function_type(function_type_) + { + } + + struct Parameter + { + TypeID type; + ID id; + uint32_t read_count; + uint32_t write_count; + + // Set to true if this parameter aliases a global variable, + // used mostly in Metal where global variables + // have to be passed down to functions as regular arguments. + // However, for this kind of variable, we should not care about + // read and write counts as access to the function arguments + // is not local to the function in question. + bool alias_global_variable; + }; + + // When calling a function, and we're remapping separate image samplers, + // resolve these arguments into combined image samplers and pass them + // as additional arguments in this order. + // It gets more complicated as functions can pull in their own globals + // and combine them with parameters, + // so we need to distinguish if something is local parameter index + // or a global ID. + struct CombinedImageSamplerParameter + { + VariableID id; + VariableID image_id; + VariableID sampler_id; + bool global_image; + bool global_sampler; + bool depth; + }; + + TypeID return_type; + TypeID function_type; + SmallVector arguments; + + // Can be used by backends to add magic arguments. + // Currently used by combined image/sampler implementation. + + SmallVector shadow_arguments; + SmallVector local_variables; + BlockID entry_block = 0; + SmallVector blocks; + SmallVector combined_parameters; + + struct EntryLine + { + uint32_t file_id = 0; + uint32_t line_literal = 0; + }; + EntryLine entry_line; + + void add_local_variable(VariableID id) + { + local_variables.push_back(id); + } + + void add_parameter(TypeID parameter_type, ID id, bool alias_global_variable = false) + { + // Arguments are read-only until proven otherwise. + arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable }); + } + + // Hooks to be run when the function returns. + // Mostly used for lowering internal data structures onto flattened structures. + // Need to defer this, because they might rely on things which change during compilation. + // Intentionally not a small vector, this one is rare, and std::function can be large. + Vector> fixup_hooks_out; + + // Hooks to be run when the function begins. + // Mostly used for populating internal data structures from flattened structures. + // Need to defer this, because they might rely on things which change during compilation. + // Intentionally not a small vector, this one is rare, and std::function can be large. + Vector> fixup_hooks_in; + + // On function entry, make sure to copy a constant array into thread addr space to work around + // the case where we are passing a constant array by value to a function on backends which do not + // consider arrays value types. + SmallVector constant_arrays_needed_on_stack; + + bool active = false; + bool flush_undeclared = true; + bool do_combined_parameters = true; + + SPIRV_CROSS_DECLARE_CLONE(SPIRFunction) +}; + +struct SPIRAccessChain : IVariant +{ + enum + { + type = TypeAccessChain + }; + + SPIRAccessChain(TypeID basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, + int32_t static_index_) + : basetype(basetype_) + , storage(storage_) + , base(std::move(base_)) + , dynamic_index(std::move(dynamic_index_)) + , static_index(static_index_) + { + } + + // The access chain represents an offset into a buffer. + // Some backends need more complicated handling of access chains to be able to use buffers, like HLSL + // which has no usable buffer type ala GLSL SSBOs. + // StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses. + + TypeID basetype; + spv::StorageClass storage; + std::string base; + std::string dynamic_index; + int32_t static_index; + + VariableID loaded_from = 0; + uint32_t matrix_stride = 0; + uint32_t array_stride = 0; + bool row_major_matrix = false; + bool immutable = false; + + // By reading this expression, we implicitly read these expressions as well. + // Used by access chain Store and Load since we read multiple expressions in this case. + SmallVector implied_read_expressions; + + SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain) +}; + +struct SPIRVariable : IVariant +{ + enum + { + type = TypeVariable + }; + + SPIRVariable() = default; + SPIRVariable(TypeID basetype_, spv::StorageClass storage_, ID initializer_ = 0, VariableID basevariable_ = 0) + : basetype(basetype_) + , storage(storage_) + , initializer(initializer_) + , basevariable(basevariable_) + { + } + + TypeID basetype = 0; + spv::StorageClass storage = spv::StorageClassGeneric; + uint32_t decoration = 0; + ID initializer = 0; + VariableID basevariable = 0; + + SmallVector dereference_chain; + bool compat_builtin = false; + + // If a variable is shadowed, we only statically assign to it + // and never actually emit a statement for it. + // When we read the variable as an expression, just forward + // shadowed_id as the expression. + bool statically_assigned = false; + ID static_expression = 0; + + // Temporaries which can remain forwarded as long as this variable is not modified. + SmallVector dependees; + bool forwardable = true; + + bool deferred_declaration = false; + bool phi_variable = false; + + // Used to deal with Phi variable flushes. See flush_phi(). + bool allocate_temporary_copy = false; + + bool remapped_variable = false; + uint32_t remapped_components = 0; + + // The block which dominates all access to this variable. + BlockID dominator = 0; + // If true, this variable is a loop variable, when accessing the variable + // outside a loop, + // we should statically forward it. + bool loop_variable = false; + // Set to true while we're inside the for loop. + bool loop_variable_enable = false; + + SPIRFunction::Parameter *parameter = nullptr; + + SPIRV_CROSS_DECLARE_CLONE(SPIRVariable) +}; + +struct SPIRConstant : IVariant +{ + enum + { + type = TypeConstant + }; + + union Constant + { + uint32_t u32; + int32_t i32; + float f32; + + uint64_t u64; + int64_t i64; + double f64; + }; + + struct ConstantVector + { + Constant r[4]; + // If != 0, this element is a specialization constant, and we should keep track of it as such. + ID id[4]; + uint32_t vecsize = 1; + + ConstantVector() + { + memset(r, 0, sizeof(r)); + } + }; + + struct ConstantMatrix + { + ConstantVector c[4]; + // If != 0, this column is a specialization constant, and we should keep track of it as such. + ID id[4]; + uint32_t columns = 1; + }; + + static inline float f16_to_f32(uint16_t u16_value) + { + // Based on the GLM implementation. + int s = (u16_value >> 15) & 0x1; + int e = (u16_value >> 10) & 0x1f; + int m = (u16_value >> 0) & 0x3ff; + + union + { + float f32; + uint32_t u32; + } u; + + if (e == 0) + { + if (m == 0) + { + u.u32 = uint32_t(s) << 31; + return u.f32; + } + else + { + while ((m & 0x400) == 0) + { + m <<= 1; + e--; + } + + e++; + m &= ~0x400; + } + } + else if (e == 31) + { + if (m == 0) + { + u.u32 = (uint32_t(s) << 31) | 0x7f800000u; + return u.f32; + } + else + { + u.u32 = (uint32_t(s) << 31) | 0x7f800000u | (m << 13); + return u.f32; + } + } + + e += 127 - 15; + m <<= 13; + u.u32 = (uint32_t(s) << 31) | (e << 23) | m; + return u.f32; + } + + inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const + { + return m.c[col].id[row]; + } + + inline uint32_t specialization_constant_id(uint32_t col) const + { + return m.id[col]; + } + + inline uint32_t scalar(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].u32; + } + + inline int16_t scalar_i16(uint32_t col = 0, uint32_t row = 0) const + { + return int16_t(m.c[col].r[row].u32 & 0xffffu); + } + + inline uint16_t scalar_u16(uint32_t col = 0, uint32_t row = 0) const + { + return uint16_t(m.c[col].r[row].u32 & 0xffffu); + } + + inline int8_t scalar_i8(uint32_t col = 0, uint32_t row = 0) const + { + return int8_t(m.c[col].r[row].u32 & 0xffu); + } + + inline uint8_t scalar_u8(uint32_t col = 0, uint32_t row = 0) const + { + return uint8_t(m.c[col].r[row].u32 & 0xffu); + } + + inline float scalar_f16(uint32_t col = 0, uint32_t row = 0) const + { + return f16_to_f32(scalar_u16(col, row)); + } + + inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].f32; + } + + inline int32_t scalar_i32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].i32; + } + + inline double scalar_f64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].f64; + } + + inline int64_t scalar_i64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].i64; + } + + inline uint64_t scalar_u64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].u64; + } + + inline const ConstantVector &vector() const + { + return m.c[0]; + } + + inline uint32_t vector_size() const + { + return m.c[0].vecsize; + } + + inline uint32_t columns() const + { + return m.columns; + } + + inline void make_null(const SPIRType &constant_type_) + { + m = {}; + m.columns = constant_type_.columns; + for (auto &c : m.c) + c.vecsize = constant_type_.vecsize; + } + + inline bool constant_is_null() const + { + if (specialization) + return false; + if (!subconstants.empty()) + return false; + + for (uint32_t col = 0; col < columns(); col++) + for (uint32_t row = 0; row < vector_size(); row++) + if (scalar_u64(col, row) != 0) + return false; + + return true; + } + + explicit SPIRConstant(uint32_t constant_type_) + : constant_type(constant_type_) + { + } + + SPIRConstant() = default; + + SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + subconstants.reserve(num_elements); + for (uint32_t i = 0; i < num_elements; i++) + subconstants.push_back(elements[i]); + specialization = specialized; + } + + // Construct scalar (32-bit). + SPIRConstant(TypeID constant_type_, uint32_t v0, bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + m.c[0].r[0].u32 = v0; + m.c[0].vecsize = 1; + m.columns = 1; + } + + // Construct scalar (64-bit). + SPIRConstant(TypeID constant_type_, uint64_t v0, bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + m.c[0].r[0].u64 = v0; + m.c[0].vecsize = 1; + m.columns = 1; + } + + // Construct vectors and matrices. + SPIRConstant(TypeID constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, + bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + bool matrix = vector_elements[0]->m.c[0].vecsize > 1; + + if (matrix) + { + m.columns = num_elements; + + for (uint32_t i = 0; i < num_elements; i++) + { + m.c[i] = vector_elements[i]->m.c[0]; + if (vector_elements[i]->specialization) + m.id[i] = vector_elements[i]->self; + } + } + else + { + m.c[0].vecsize = num_elements; + m.columns = 1; + + for (uint32_t i = 0; i < num_elements; i++) + { + m.c[0].r[i] = vector_elements[i]->m.c[0].r[0]; + if (vector_elements[i]->specialization) + m.c[0].id[i] = vector_elements[i]->self; + } + } + } + + TypeID constant_type = 0; + ConstantMatrix m; + + // If this constant is a specialization constant (i.e. created with OpSpecConstant*). + bool specialization = false; + // If this constant is used as an array length which creates specialization restrictions on some backends. + bool is_used_as_array_length = false; + + // If true, this is a LUT, and should always be declared in the outer scope. + bool is_used_as_lut = false; + + // For composites which are constant arrays, etc. + SmallVector subconstants; + + // Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant, + // and uses them to initialize the constant. This allows the user + // to still be able to specialize the value by supplying corresponding + // preprocessor directives before compiling the shader. + std::string specialization_constant_macro_name; + + SPIRV_CROSS_DECLARE_CLONE(SPIRConstant) +}; + +// Variants have a very specific allocation scheme. +struct ObjectPoolGroup +{ + std::unique_ptr pools[TypeCount]; +}; + +class Variant +{ +public: + explicit Variant(ObjectPoolGroup *group_) + : group(group_) + { + } + + ~Variant() + { + if (holder) + group->pools[type]->free_opaque(holder); + } + + // Marking custom move constructor as noexcept is important. + Variant(Variant &&other) SPIRV_CROSS_NOEXCEPT + { + *this = std::move(other); + } + + // We cannot copy from other variant without our own pool group. + // Have to explicitly copy. + Variant(const Variant &variant) = delete; + + // Marking custom move constructor as noexcept is important. + Variant &operator=(Variant &&other) SPIRV_CROSS_NOEXCEPT + { + if (this != &other) + { + if (holder) + group->pools[type]->free_opaque(holder); + holder = other.holder; + group = other.group; + type = other.type; + allow_type_rewrite = other.allow_type_rewrite; + + other.holder = nullptr; + other.type = TypeNone; + } + return *this; + } + + // This copy/clone should only be called in the Compiler constructor. + // If this is called inside ::compile(), we invalidate any references we took higher in the stack. + // This should never happen. + Variant &operator=(const Variant &other) + { +//#define SPIRV_CROSS_COPY_CONSTRUCTOR_SANITIZE +#ifdef SPIRV_CROSS_COPY_CONSTRUCTOR_SANITIZE + abort(); +#endif + if (this != &other) + { + if (holder) + group->pools[type]->free_opaque(holder); + + if (other.holder) + holder = other.holder->clone(group->pools[other.type].get()); + else + holder = nullptr; + + type = other.type; + allow_type_rewrite = other.allow_type_rewrite; + } + return *this; + } + + void set(IVariant *val, Types new_type) + { + if (holder) + group->pools[type]->free_opaque(holder); + holder = nullptr; + + if (!allow_type_rewrite && type != TypeNone && type != new_type) + { + if (val) + group->pools[new_type]->free_opaque(val); + SPIRV_CROSS_THROW("Overwriting a variant with new type."); + } + + holder = val; + type = new_type; + allow_type_rewrite = false; + } + + template + T *allocate_and_set(Types new_type, Ts &&... ts) + { + T *val = static_cast &>(*group->pools[new_type]).allocate(std::forward(ts)...); + set(val, new_type); + return val; + } + + template + T &get() + { + if (!holder) + SPIRV_CROSS_THROW("nullptr"); + if (static_cast(T::type) != type) + SPIRV_CROSS_THROW("Bad cast"); + return *static_cast(holder); + } + + template + const T &get() const + { + if (!holder) + SPIRV_CROSS_THROW("nullptr"); + if (static_cast(T::type) != type) + SPIRV_CROSS_THROW("Bad cast"); + return *static_cast(holder); + } + + Types get_type() const + { + return type; + } + + ID get_id() const + { + return holder ? holder->self : ID(0); + } + + bool empty() const + { + return !holder; + } + + void reset() + { + if (holder) + group->pools[type]->free_opaque(holder); + holder = nullptr; + type = TypeNone; + } + + void set_allow_type_rewrite() + { + allow_type_rewrite = true; + } + +private: + ObjectPoolGroup *group = nullptr; + IVariant *holder = nullptr; + Types type = TypeNone; + bool allow_type_rewrite = false; +}; + +template +T &variant_get(Variant &var) +{ + return var.get(); +} + +template +const T &variant_get(const Variant &var) +{ + return var.get(); +} + +template +T &variant_set(Variant &var, P &&... args) +{ + auto *ptr = var.allocate_and_set(static_cast(T::type), std::forward

(args)...); + return *ptr; +} + +struct AccessChainMeta +{ + uint32_t storage_physical_type = 0; + bool need_transpose = false; + bool storage_is_packed = false; + bool storage_is_invariant = false; + bool flattened_struct = false; +}; + +enum ExtendedDecorations +{ + // Marks if a buffer block is re-packed, i.e. member declaration might be subject to PhysicalTypeID remapping and padding. + SPIRVCrossDecorationBufferBlockRepacked = 0, + + // A type in a buffer block might be declared with a different physical type than the logical type. + // If this is not set, PhysicalTypeID == the SPIR-V type as declared. + SPIRVCrossDecorationPhysicalTypeID, + + // Marks if the physical type is to be declared with tight packing rules, i.e. packed_floatN on MSL and friends. + // If this is set, PhysicalTypeID might also be set. It can be set to same as logical type if all we're doing + // is converting float3 to packed_float3 for example. + // If this is marked on a struct, it means the struct itself must use only Packed types for all its members. + SPIRVCrossDecorationPhysicalTypePacked, + + // The padding in bytes before declaring this struct member. + // If used on a struct type, marks the target size of a struct. + SPIRVCrossDecorationPaddingTarget, + + SPIRVCrossDecorationInterfaceMemberIndex, + SPIRVCrossDecorationInterfaceOrigID, + SPIRVCrossDecorationResourceIndexPrimary, + // Used for decorations like resource indices for samplers when part of combined image samplers. + // A variable might need to hold two resource indices in this case. + SPIRVCrossDecorationResourceIndexSecondary, + // Used for resource indices for multiplanar images when part of combined image samplers. + SPIRVCrossDecorationResourceIndexTertiary, + SPIRVCrossDecorationResourceIndexQuaternary, + + // Marks a buffer block for using explicit offsets (GLSL/HLSL). + SPIRVCrossDecorationExplicitOffset, + + // Apply to a variable in the Input storage class; marks it as holding the base group passed to vkCmdDispatchBase(), + // or the base vertex and instance indices passed to vkCmdDrawIndexed(). + // In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables in compute shaders, + // and to hold the BaseVertex and BaseInstance variables in vertex shaders. + SPIRVCrossDecorationBuiltInDispatchBase, + + // Apply to a variable that is a function parameter; marks it as being a "dynamic" + // combined image-sampler. In MSL, this is used when a function parameter might hold + // either a regular combined image-sampler or one that has an attached sampler + // Y'CbCr conversion. + SPIRVCrossDecorationDynamicImageSampler, + + // Apply to a variable in the Input storage class; marks it as holding the size of the stage + // input grid. + // In MSL, this is used to hold the vertex and instance counts in a tessellation pipeline + // vertex shader. + SPIRVCrossDecorationBuiltInStageInputSize, + + // Apply to any access chain of a tessellation I/O variable; stores the type of the sub-object + // that was chained to, as recorded in the input variable itself. This is used in case the pointer + // is itself used as the base of an access chain, to calculate the original type of the sub-object + // chained to, in case a swizzle needs to be applied. This should not happen normally with valid + // SPIR-V, but the MSL backend can change the type of input variables, necessitating the + // addition of swizzles to keep the generated code compiling. + SPIRVCrossDecorationTessIOOriginalInputTypeID, + + // Apply to any access chain of an interface variable used with pull-model interpolation, where the variable is a + // vector but the resulting pointer is a scalar; stores the component index that is to be accessed by the chain. + // This is used when emitting calls to interpolation functions on the chain in MSL: in this case, the component + // must be applied to the result, since pull-model interpolants in MSL cannot be swizzled directly, but the + // results of interpolation can. + SPIRVCrossDecorationInterpolantComponentExpr, + + SPIRVCrossDecorationCount +}; + +struct Meta +{ + struct Decoration + { + std::string alias; + std::string qualified_alias; + std::string hlsl_semantic; + Bitset decoration_flags; + spv::BuiltIn builtin_type = spv::BuiltInMax; + uint32_t location = 0; + uint32_t component = 0; + uint32_t set = 0; + uint32_t binding = 0; + uint32_t offset = 0; + uint32_t xfb_buffer = 0; + uint32_t xfb_stride = 0; + uint32_t stream = 0; + uint32_t array_stride = 0; + uint32_t matrix_stride = 0; + uint32_t input_attachment = 0; + uint32_t spec_id = 0; + uint32_t index = 0; + spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax; + bool builtin = false; + + struct Extended + { + Extended() + { + // MSVC 2013 workaround to init like this. + for (auto &v : values) + v = 0; + } + + Bitset flags; + uint32_t values[SPIRVCrossDecorationCount]; + } extended; + }; + + Decoration decoration; + + // Intentionally not a SmallVector. Decoration is large and somewhat rare. + Vector members; + + std::unordered_map decoration_word_offset; + + // For SPV_GOOGLE_hlsl_functionality1. + bool hlsl_is_magic_counter_buffer = false; + // ID for the sibling counter buffer. + uint32_t hlsl_magic_counter_buffer = 0; +}; + +// A user callback that remaps the type of any variable. +// var_name is the declared name of the variable. +// name_of_type is the textual name of the type which will be used in the code unless written to by the callback. +using VariableTypeRemapCallback = + std::function; + +class Hasher +{ +public: + inline void u32(uint32_t value) + { + h = (h * 0x100000001b3ull) ^ value; + } + + inline uint64_t get() const + { + return h; + } + +private: + uint64_t h = 0xcbf29ce484222325ull; +}; + +static inline bool type_is_floating_point(const SPIRType &type) +{ + return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double; +} + +static inline bool type_is_integral(const SPIRType &type) +{ + return type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte || type.basetype == SPIRType::Short || + type.basetype == SPIRType::UShort || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || + type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64; +} + +static inline SPIRType::BaseType to_signed_basetype(uint32_t width) +{ + switch (width) + { + case 8: + return SPIRType::SByte; + case 16: + return SPIRType::Short; + case 32: + return SPIRType::Int; + case 64: + return SPIRType::Int64; + default: + SPIRV_CROSS_THROW("Invalid bit width."); + } +} + +static inline SPIRType::BaseType to_unsigned_basetype(uint32_t width) +{ + switch (width) + { + case 8: + return SPIRType::UByte; + case 16: + return SPIRType::UShort; + case 32: + return SPIRType::UInt; + case 64: + return SPIRType::UInt64; + default: + SPIRV_CROSS_THROW("Invalid bit width."); + } +} + +// Returns true if an arithmetic operation does not change behavior depending on signedness. +static inline bool opcode_is_sign_invariant(spv::Op opcode) +{ + switch (opcode) + { + case spv::OpIEqual: + case spv::OpINotEqual: + case spv::OpISub: + case spv::OpIAdd: + case spv::OpIMul: + case spv::OpShiftLeftLogical: + case spv::OpBitwiseOr: + case spv::OpBitwiseXor: + case spv::OpBitwiseAnd: + return true; + + default: + return false; + } +} + +struct SetBindingPair +{ + uint32_t desc_set; + uint32_t binding; + + inline bool operator==(const SetBindingPair &other) const + { + return desc_set == other.desc_set && binding == other.binding; + } + + inline bool operator<(const SetBindingPair &other) const + { + return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding); + } +}; + +struct StageSetBinding +{ + spv::ExecutionModel model; + uint32_t desc_set; + uint32_t binding; + + inline bool operator==(const StageSetBinding &other) const + { + return model == other.model && desc_set == other.desc_set && binding == other.binding; + } +}; + +struct InternalHasher +{ + inline size_t operator()(const SetBindingPair &value) const + { + // Quality of hash doesn't really matter here. + auto hash_set = std::hash()(value.desc_set); + auto hash_binding = std::hash()(value.binding); + return (hash_set * 0x10001b31) ^ hash_binding; + } + + inline size_t operator()(const StageSetBinding &value) const + { + // Quality of hash doesn't really matter here. + auto hash_model = std::hash()(value.model); + auto hash_set = std::hash()(value.desc_set); + auto tmp_hash = (hash_model * 0x10001b31) ^ hash_set; + return (tmp_hash * 0x10001b31) ^ value.binding; + } +}; + +// Special constant used in a {MSL,HLSL}ResourceBinding desc_set +// element to indicate the bindings for the push constants. +static const uint32_t ResourceBindingPushConstantDescriptorSet = ~(0u); + +// Special constant used in a {MSL,HLSL}ResourceBinding binding +// element to indicate the bindings for the push constants. +static const uint32_t ResourceBindingPushConstantBinding = 0; +} // namespace SPIRV_CROSS_NAMESPACE + +namespace std +{ +template +struct hash> +{ + size_t operator()(const SPIRV_CROSS_NAMESPACE::TypedID &value) const + { + return std::hash()(value); + } +}; +} // namespace std + +#endif diff --git a/dep/spirv-cross/spirv_cpp.cpp b/dep/spirv-cross/spirv_cpp.cpp new file mode 100644 index 000000000..492044314 --- /dev/null +++ b/dep/spirv-cross/spirv_cpp.cpp @@ -0,0 +1,558 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_cpp.hpp" + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +void CompilerCPP::emit_buffer_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t descriptor_set = ir.meta[var.self].decoration.set; + uint32_t binding = ir.meta[var.self].decoration.binding; + + emit_block_struct(type); + auto buffer_name = to_name(type.self); + + statement("internal::Resource<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); + statement(""); +} + +void CompilerCPP::emit_interface_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + + const char *qual = var.storage == StorageClassInput ? "StageInput" : "StageOutput"; + const char *lowerqual = var.storage == StorageClassInput ? "stage_input" : "stage_output"; + auto instance_name = to_name(var.self); + uint32_t location = ir.meta[var.self].decoration.location; + + string buffer_name; + auto flags = ir.meta[type.self].decoration.decoration_flags; + if (flags.get(DecorationBlock)) + { + emit_block_struct(type); + buffer_name = to_name(type.self); + } + else + buffer_name = type_to_glsl(type); + + statement("internal::", qual, "<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back(join("s.register_", lowerqual, "(", instance_name, "__", ", ", location, ");")); + statement(""); +} + +void CompilerCPP::emit_shared(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto instance_name = to_name(var.self); + statement(CompilerGLSL::variable_decl(var), ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name); +} + +void CompilerCPP::emit_uniform(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t descriptor_set = ir.meta[var.self].decoration.set; + uint32_t binding = ir.meta[var.self].decoration.binding; + uint32_t location = ir.meta[var.self].decoration.location; + + string type_name = type_to_glsl(type); + remap_variable_type_name(type, instance_name, type_name); + + if (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::AtomicCounter) + { + statement("internal::Resource<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); + } + else + { + statement("internal::UniformConstant<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_uniform_constant(", instance_name, "__", ", ", location, ");")); + } + + statement(""); +} + +void CompilerCPP::emit_push_constant_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get(var.basetype); + auto &flags = ir.meta[var.self].decoration.decoration_flags; + if (flags.get(DecorationBinding) || flags.get(DecorationDescriptorSet)) + SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); + + emit_block_struct(type); + auto buffer_name = to_name(type.self); + auto instance_name = to_name(var.self); + + statement("internal::PushConstant<", buffer_name, type_to_array_glsl(type), "> ", instance_name, ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, ".get()"); + resource_registrations.push_back(join("s.register_push_constant(", instance_name, "__", ");")); + statement(""); +} + +void CompilerCPP::emit_block_struct(SPIRType &type) +{ + // C++ can't do interface blocks, so we fake it by emitting a separate struct. + // However, these structs are not allowed to alias anything, so remove it before + // emitting the struct. + // + // The type we have here needs to be resolved to the non-pointer type so we can remove aliases. + auto &self = get(type.self); + self.type_alias = 0; + emit_struct(self); +} + +void CompilerCPP::emit_resources() +{ + for (auto &id : ir.ids) + { + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + + bool needs_declaration = c.specialization || c.is_used_as_lut; + + if (needs_declaration) + { + if (!options.vulkan_semantics && c.specialization) + { + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + } + emit_constant(c); + } + } + else if (id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get()); + } + } + + // Output all basic struct types which are not Block or BufferBlock as these are declared inplace + // when such variables are instantiated. + for (auto &id : ir.ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && + !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + emit_struct(type); + } + } + } + + statement("struct Resources : ", resource_type); + begin_scope(); + + // Output UBOs and SSBOs + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform && + !is_hidden_variable(var) && + (ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + emit_buffer_block(var); + } + } + } + + // Output push constant blocks + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + if (!is_hidden_variable(var) && var.storage != StorageClassFunction && type.pointer && + type.storage == StorageClassPushConstant) + { + emit_push_constant_block(var); + } + } + } + + // Output in/out interfaces. + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && !is_hidden_variable(var) && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self)) + { + emit_interface_block(var); + } + } + } + + // Output Uniform Constants (values, samplers, images, etc). + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + auto &type = get(var.basetype); + + if (var.storage != StorageClassFunction && !is_hidden_variable(var) && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + { + emit_uniform(var); + } + } + } + + // Global variables. + bool emitted = false; + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage == StorageClassWorkgroup) + { + emit_shared(var); + emitted = true; + } + } + + if (emitted) + statement(""); + + declare_undefined_values(); + + statement("inline void init(spirv_cross_shader& s)"); + begin_scope(); + statement(resource_type, "::init(s);"); + for (auto ® : resource_registrations) + statement(reg); + end_scope(); + resource_registrations.clear(); + + end_scope_decl(); + + statement(""); + statement("Resources* __res;"); + if (get_entry_point().model == ExecutionModelGLCompute) + statement("ComputePrivateResources __priv_res;"); + statement(""); + + // Emit regular globals which are allocated per invocation. + emitted = false; + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage == StorageClassPrivate) + { + if (var.storage == StorageClassWorkgroup) + emit_shared(var); + else + statement(CompilerGLSL::variable_decl(var), ";"); + emitted = true; + } + } + + if (emitted) + statement(""); +} + +string CompilerCPP::compile() +{ + ir.fixup_reserved_names(); + + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 450; + backend.float_literal_suffix = true; + backend.double_literal_suffix = false; + backend.long_long_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.basic_int_type = "int32_t"; + backend.basic_uint_type = "uint32_t"; + backend.swizzle_is_function = true; + backend.shared_is_implied = true; + backend.unsized_array_supported = false; + backend.explicit_struct_type = true; + backend.use_initializer_list = true; + + fixup_type_alias(); + reorder_type_alias(); + build_function_control_flow_graphs_and_analyze(); + update_active_builtins(); + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + resource_registrations.clear(); + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer.reset(); + + emit_header(); + emit_resources(); + + emit_function(get(ir.default_entry_point), Bitset()); + + pass_count++; + } while (is_forcing_recompilation()); + + // Match opening scope of emit_header(). + end_scope_decl(); + // namespace + end_scope(); + + // Emit C entry points + emit_c_linkage(); + + // Entry point in CPP is always main() for the time being. + get_entry_point().name = "main"; + + return buffer.str(); +} + +void CompilerCPP::emit_c_linkage() +{ + statement(""); + + statement("spirv_cross_shader_t *spirv_cross_construct(void)"); + begin_scope(); + statement("return new ", impl_type, "();"); + end_scope(); + + statement(""); + statement("void spirv_cross_destruct(spirv_cross_shader_t *shader)"); + begin_scope(); + statement("delete static_cast<", impl_type, "*>(shader);"); + end_scope(); + + statement(""); + statement("void spirv_cross_invoke(spirv_cross_shader_t *shader)"); + begin_scope(); + statement("static_cast<", impl_type, "*>(shader)->invoke();"); + end_scope(); + + statement(""); + statement("static const struct spirv_cross_interface vtable ="); + begin_scope(); + statement("spirv_cross_construct,"); + statement("spirv_cross_destruct,"); + statement("spirv_cross_invoke,"); + end_scope_decl(); + + statement(""); + statement("const struct spirv_cross_interface *", + interface_name.empty() ? string("spirv_cross_get_interface") : interface_name, "(void)"); + begin_scope(); + statement("return &vtable;"); + end_scope(); +} + +void CompilerCPP::emit_function_prototype(SPIRFunction &func, const Bitset &) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + local_variable_names = resource_names; + string decl; + + auto &type = get(func.return_type); + decl += "inline "; + decl += type_to_glsl(type); + decl += " "; + + if (func.self == ir.default_entry_point) + { + decl += "main"; + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + for (auto &arg : func.arguments) + { + add_local_variable_name(arg.id); + + decl += argument_decl(arg); + if (&arg != &func.arguments.back()) + decl += ", "; + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += ")"; + statement(decl); +} + +string CompilerCPP::argument_decl(const SPIRFunction::Parameter &arg) +{ + auto &type = expression_type(arg.id); + bool constref = !type.pointer || arg.write_count == 0; + + auto &var = get(arg.id); + + string base = type_to_glsl(type); + string variable_name = to_name(var.self); + remap_variable_type_name(type, variable_name, base); + + for (uint32_t i = 0; i < type.array.size(); i++) + base = join("std::array<", base, ", ", to_array_size(type, i), ">"); + + return join(constref ? "const " : "", base, " &", variable_name); +} + +string CompilerCPP::variable_decl(const SPIRType &type, const string &name, uint32_t /* id */) +{ + string base = type_to_glsl(type); + remap_variable_type_name(type, name, base); + bool runtime = false; + + for (uint32_t i = 0; i < type.array.size(); i++) + { + auto &array = type.array[i]; + if (!array && type.array_size_literal[i]) + { + // Avoid using runtime arrays with std::array since this is undefined. + // Runtime arrays cannot be passed around as values, so this is fine. + runtime = true; + } + else + base = join("std::array<", base, ", ", to_array_size(type, i), ">"); + } + base += ' '; + return base + name + (runtime ? "[1]" : ""); +} + +void CompilerCPP::emit_header() +{ + auto &execution = get_entry_point(); + + statement("// This C++ shader is autogenerated by spirv-cross."); + statement("#include \"spirv_cross/internal_interface.hpp\""); + statement("#include \"spirv_cross/external_interface.h\""); + // Needed to properly implement GLSL-style arrays. + statement("#include "); + statement("#include "); + statement(""); + statement("using namespace spirv_cross;"); + statement("using namespace glm;"); + statement(""); + + statement("namespace Impl"); + begin_scope(); + + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelGLCompute: + case ExecutionModelFragment: + case ExecutionModelVertex: + statement("struct Shader"); + begin_scope(); + break; + + default: + SPIRV_CROSS_THROW("Unsupported execution model."); + } + + switch (execution.model) + { + case ExecutionModelGeometry: + impl_type = "GeometryShader"; + resource_type = "GeometryResources"; + break; + + case ExecutionModelVertex: + impl_type = "VertexShader"; + resource_type = "VertexResources"; + break; + + case ExecutionModelFragment: + impl_type = "FragmentShader"; + resource_type = "FragmentResources"; + break; + + case ExecutionModelGLCompute: + impl_type = join("ComputeShader"); + resource_type = "ComputeResources"; + break; + + case ExecutionModelTessellationControl: + impl_type = "TessControlShader"; + resource_type = "TessControlResources"; + break; + + case ExecutionModelTessellationEvaluation: + impl_type = "TessEvaluationShader"; + resource_type = "TessEvaluationResources"; + break; + + default: + SPIRV_CROSS_THROW("Unsupported execution model."); + } +} diff --git a/dep/spirv-cross/spirv_cpp.hpp b/dep/spirv-cross/spirv_cpp.hpp new file mode 100644 index 000000000..686624063 --- /dev/null +++ b/dep/spirv-cross/spirv_cpp.hpp @@ -0,0 +1,93 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_CPP_HPP +#define SPIRV_CROSS_CPP_HPP + +#include "spirv_glsl.hpp" +#include + +namespace SPIRV_CROSS_NAMESPACE +{ +class CompilerCPP : public CompilerGLSL +{ +public: + explicit CompilerCPP(std::vector spirv_) + : CompilerGLSL(std::move(spirv_)) + { + } + + CompilerCPP(const uint32_t *ir_, size_t word_count) + : CompilerGLSL(ir_, word_count) + { + } + + explicit CompilerCPP(const ParsedIR &ir_) + : CompilerGLSL(ir_) + { + } + + explicit CompilerCPP(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) + { + } + + std::string compile() override; + + // Sets a custom symbol name that can override + // spirv_cross_get_interface. + // + // Useful when several shader interfaces are linked + // statically into the same binary. + void set_interface_name(std::string name) + { + interface_name = std::move(name); + } + +private: + void emit_header() override; + void emit_c_linkage(); + void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; + + void emit_resources(); + void emit_buffer_block(const SPIRVariable &type) override; + void emit_push_constant_block(const SPIRVariable &var) override; + void emit_interface_block(const SPIRVariable &type); + void emit_block_chain(SPIRBlock &block); + void emit_uniform(const SPIRVariable &var) override; + void emit_shared(const SPIRVariable &var); + void emit_block_struct(SPIRType &type); + std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id) override; + + std::string argument_decl(const SPIRFunction::Parameter &arg); + + SmallVector resource_registrations; + std::string impl_type; + std::string resource_type; + uint32_t shared_counter = 0; + + std::string interface_name; +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_cross.cpp b/dep/spirv-cross/spirv_cross.cpp new file mode 100644 index 000000000..5c1b3ebc5 --- /dev/null +++ b/dep/spirv-cross/spirv_cross.cpp @@ -0,0 +1,4877 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_cross.hpp" +#include "GLSL.std.450.h" +#include "spirv_cfg.hpp" +#include "spirv_common.hpp" +#include "spirv_parser.hpp" +#include +#include +#include + +using namespace std; +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; + +Compiler::Compiler(vector ir_) +{ + Parser parser(move(ir_)); + parser.parse(); + set_ir(move(parser.get_parsed_ir())); +} + +Compiler::Compiler(const uint32_t *ir_, size_t word_count) +{ + Parser parser(ir_, word_count); + parser.parse(); + set_ir(move(parser.get_parsed_ir())); +} + +Compiler::Compiler(const ParsedIR &ir_) +{ + set_ir(ir_); +} + +Compiler::Compiler(ParsedIR &&ir_) +{ + set_ir(move(ir_)); +} + +void Compiler::set_ir(ParsedIR &&ir_) +{ + ir = move(ir_); + parse_fixup(); +} + +void Compiler::set_ir(const ParsedIR &ir_) +{ + ir = ir_; + parse_fixup(); +} + +string Compiler::compile() +{ + return ""; +} + +bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) +{ + auto &type = get(v.basetype); + bool ssbo = v.storage == StorageClassStorageBuffer || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + bool image = type.basetype == SPIRType::Image; + bool counter = type.basetype == SPIRType::AtomicCounter; + bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT; + + bool is_restrict; + if (ssbo) + is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict); + else + is_restrict = has_decoration(v.self, DecorationRestrict); + + return !is_restrict && (ssbo || image || counter || buffer_reference); +} + +bool Compiler::block_is_pure(const SPIRBlock &block) +{ + // This is a global side effect of the function. + if (block.terminator == SPIRBlock::Kill) + return false; + + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + if (!function_is_pure(get(func))) + return false; + break; + } + + case OpCopyMemory: + case OpStore: + { + auto &type = expression_type(ops[0]); + if (type.storage != StorageClassFunction) + return false; + break; + } + + case OpImageWrite: + return false; + + // Atomics are impure. + case OpAtomicLoad: + case OpAtomicStore: + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + return false; + + // Geometry shader builtins modify global state. + case OpEndPrimitive: + case OpEmitStreamVertex: + case OpEndStreamPrimitive: + case OpEmitVertex: + return false; + + // Barriers disallow any reordering, so we should treat blocks with barrier as writing. + case OpControlBarrier: + case OpMemoryBarrier: + return false; + + // Ray tracing builtins are impure. + case OpReportIntersectionNV: + case OpIgnoreIntersectionNV: + case OpTerminateRayNV: + case OpTraceNV: + case OpExecuteCallableNV: + return false; + + // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. + + case OpDemoteToHelperInvocationEXT: + // This is a global side effect of the function. + return false; + + default: + break; + } + } + + return true; +} + +string Compiler::to_name(uint32_t id, bool allow_alias) const +{ + if (allow_alias && ir.ids[id].get_type() == TypeType) + { + // If this type is a simple alias, emit the + // name of the original type instead. + // We don't want to override the meta alias + // as that can be overridden by the reflection APIs after parse. + auto &type = get(id); + if (type.type_alias) + { + // If the alias master has been specially packed, we will have emitted a clean variant as well, + // so skip the name aliasing here. + if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + return to_name(type.type_alias); + } + } + + auto &alias = ir.get_name(id); + if (alias.empty()) + return join("_", id); + else + return alias; +} + +bool Compiler::function_is_pure(const SPIRFunction &func) +{ + for (auto block : func.blocks) + { + if (!block_is_pure(get(block))) + { + //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str()); + return false; + } + } + + //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str()); + return true; +} + +void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id) +{ + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + register_global_read_dependencies(get(func), id); + break; + } + + case OpLoad: + case OpImageRead: + { + // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. + auto *var = maybe_get_backing_variable(ops[2]); + if (var && var->storage != StorageClassFunction) + { + auto &type = get(var->basetype); + + // InputTargets are immutable. + if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData) + var->dependees.push_back(id); + } + break; + } + + default: + break; + } + } +} + +void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id) +{ + for (auto block : func.blocks) + register_global_read_dependencies(get(block), id); +} + +SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) +{ + auto *var = maybe_get(chain); + if (!var) + { + auto *cexpr = maybe_get(chain); + if (cexpr) + var = maybe_get(cexpr->loaded_from); + + auto *access_chain = maybe_get(chain); + if (access_chain) + var = maybe_get(access_chain->loaded_from); + } + + return var; +} + +StorageClass Compiler::get_expression_effective_storage_class(uint32_t ptr) +{ + auto *var = maybe_get_backing_variable(ptr); + + // If the expression has been lowered to a temporary, we need to use the Generic storage class. + // We're looking for the effective storage class of a given expression. + // An access chain or forwarded OpLoads from such access chains + // will generally have the storage class of the underlying variable, but if the load was not forwarded + // we have lost any address space qualifiers. + bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get(ptr).access_chain && + (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); + + if (var && !forced_temporary) + { + // Normalize SSBOs to StorageBuffer here. + if (var->storage == StorageClassUniform && + has_decoration(get(var->basetype).self, DecorationBufferBlock)) + return StorageClassStorageBuffer; + else + return var->storage; + } + else + return expression_type(ptr).storage; +} + +void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) +{ + auto &e = get(expr); + auto *var = maybe_get_backing_variable(chain); + + if (var) + { + e.loaded_from = var->self; + + // If the backing variable is immutable, we do not need to depend on the variable. + if (forwarded && !is_immutable(var->self)) + var->dependees.push_back(e.self); + + // If we load from a parameter, make sure we create "inout" if we also write to the parameter. + // The default is "in" however, so we never invalidate our compilation by reading. + if (var && var->parameter) + var->parameter->read_count++; + } +} + +void Compiler::register_write(uint32_t chain) +{ + auto *var = maybe_get(chain); + if (!var) + { + // If we're storing through an access chain, invalidate the backing variable instead. + auto *expr = maybe_get(chain); + if (expr && expr->loaded_from) + var = maybe_get(expr->loaded_from); + + auto *access_chain = maybe_get(chain); + if (access_chain && access_chain->loaded_from) + var = maybe_get(access_chain->loaded_from); + } + + auto &chain_type = expression_type(chain); + + if (var) + { + bool check_argument_storage_qualifier = true; + auto &type = expression_type(chain); + + // If our variable is in a storage class which can alias with other buffers, + // invalidate all variables which depend on aliased variables. And if this is a + // variable pointer, then invalidate all variables regardless. + if (get_variable_data_type(*var).pointer) + { + flush_all_active_variables(); + + if (type.pointer_depth == 1) + { + // We have a backing variable which is a pointer-to-pointer type. + // We are storing some data through a pointer acquired through that variable, + // but we are not writing to the value of the variable itself, + // i.e., we are not modifying the pointer directly. + // If we are storing a non-pointer type (pointer_depth == 1), + // we know that we are storing some unrelated data. + // A case here would be + // void foo(Foo * const *arg) { + // Foo *bar = *arg; + // bar->unrelated = 42; + // } + // arg, the argument is constant. + check_argument_storage_qualifier = false; + } + } + + if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + else if (var) + flush_dependees(*var); + + // We tried to write to a parameter which is not marked with out qualifier, force a recompile. + if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0) + { + var->parameter->write_count++; + force_recompile(); + } + } + else if (chain_type.pointer) + { + // If we stored through a variable pointer, then we don't know which + // variable we stored to. So *all* expressions after this point need to + // be invalidated. + // FIXME: If we can prove that the variable pointer will point to + // only certain variables, we can invalidate only those. + flush_all_active_variables(); + } + + // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead. + // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries. +} + +void Compiler::flush_dependees(SPIRVariable &var) +{ + for (auto expr : var.dependees) + invalid_expressions.insert(expr); + var.dependees.clear(); +} + +void Compiler::flush_all_aliased_variables() +{ + for (auto aliased : aliased_variables) + flush_dependees(get(aliased)); +} + +void Compiler::flush_all_atomic_capable_variables() +{ + for (auto global : global_variables) + flush_dependees(get(global)); + flush_all_aliased_variables(); +} + +void Compiler::flush_control_dependent_expressions(uint32_t block_id) +{ + auto &block = get(block_id); + for (auto &expr : block.invalidate_expressions) + invalid_expressions.insert(expr); + block.invalidate_expressions.clear(); +} + +void Compiler::flush_all_active_variables() +{ + // Invalidate all temporaries we read from variables in this block since they were forwarded. + // Invalidate all temporaries we read from globals. + for (auto &v : current_function->local_variables) + flush_dependees(get(v)); + for (auto &arg : current_function->arguments) + flush_dependees(get(arg.id)); + for (auto global : global_variables) + flush_dependees(get(global)); + + flush_all_aliased_variables(); +} + +uint32_t Compiler::expression_type_id(uint32_t id) const +{ + switch (ir.ids[id].get_type()) + { + case TypeVariable: + return get(id).basetype; + + case TypeExpression: + return get(id).expression_type; + + case TypeConstant: + return get(id).constant_type; + + case TypeConstantOp: + return get(id).basetype; + + case TypeUndef: + return get(id).basetype; + + case TypeCombinedImageSampler: + return get(id).combined_type; + + case TypeAccessChain: + return get(id).basetype; + + default: + SPIRV_CROSS_THROW("Cannot resolve expression type."); + } +} + +const SPIRType &Compiler::expression_type(uint32_t id) const +{ + return get(expression_type_id(id)); +} + +bool Compiler::expression_is_lvalue(uint32_t id) const +{ + auto &type = expression_type(id); + switch (type.basetype) + { + case SPIRType::SampledImage: + case SPIRType::Image: + case SPIRType::Sampler: + return false; + + default: + return true; + } +} + +bool Compiler::is_immutable(uint32_t id) const +{ + if (ir.ids[id].get_type() == TypeVariable) + { + auto &var = get(id); + + // Anything we load from the UniformConstant address space is guaranteed to be immutable. + bool pointer_to_const = var.storage == StorageClassUniformConstant; + return pointer_to_const || var.phi_variable || !expression_is_lvalue(id); + } + else if (ir.ids[id].get_type() == TypeAccessChain) + return get(id).immutable; + else if (ir.ids[id].get_type() == TypeExpression) + return get(id).immutable; + else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp || + ir.ids[id].get_type() == TypeUndef) + return true; + else + return false; +} + +static inline bool storage_class_is_interface(spv::StorageClass storage) +{ + switch (storage) + { + case StorageClassInput: + case StorageClassOutput: + case StorageClassUniform: + case StorageClassUniformConstant: + case StorageClassAtomicCounter: + case StorageClassPushConstant: + case StorageClassStorageBuffer: + return true; + + default: + return false; + } +} + +bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const +{ + if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable) + return true; + + // Combined image samplers are always considered active as they are "magic" variables. + if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) { + return samp.combined_id == var.self; + }) != end(combined_image_samplers)) + { + return false; + } + + bool hidden = false; + if (check_active_interface_variables && storage_class_is_interface(var.storage)) + hidden = active_interface_variables.find(var.self) == end(active_interface_variables); + return hidden; +} + +bool Compiler::is_builtin_type(const SPIRType &type) const +{ + auto *type_meta = ir.find_meta(type.self); + + // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin. + if (type_meta) + for (auto &m : type_meta->members) + if (m.builtin) + return true; + + return false; +} + +bool Compiler::is_builtin_variable(const SPIRVariable &var) const +{ + auto *m = ir.find_meta(var.self); + + if (var.compat_builtin || (m && m->decoration.builtin)) + return true; + else + return is_builtin_type(get(var.basetype)); +} + +bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const +{ + auto *type_meta = ir.find_meta(type.self); + + if (type_meta) + { + auto &memb = type_meta->members; + if (index < memb.size() && memb[index].builtin) + { + if (builtin) + *builtin = memb[index].builtin_type; + return true; + } + } + + return false; +} + +bool Compiler::is_scalar(const SPIRType &type) const +{ + return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1; +} + +bool Compiler::is_vector(const SPIRType &type) const +{ + return type.vecsize > 1 && type.columns == 1; +} + +bool Compiler::is_matrix(const SPIRType &type) const +{ + return type.vecsize > 1 && type.columns > 1; +} + +bool Compiler::is_array(const SPIRType &type) const +{ + return !type.array.empty(); +} + +ShaderResources Compiler::get_shader_resources() const +{ + return get_shader_resources(nullptr); +} + +ShaderResources Compiler::get_shader_resources(const unordered_set &active_variables) const +{ + return get_shader_resources(&active_variables); +} + +bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + uint32_t variable = 0; + switch (opcode) + { + // Need this first, otherwise, GCC complains about unhandled switch statements. + default: + break; + + case OpFunctionCall: + { + // Invalid SPIR-V. + if (length < 3) + return false; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + { + auto *var = compiler.maybe_get(args[i]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[i]); + } + break; + } + + case OpSelect: + { + // Invalid SPIR-V. + if (length < 5) + return false; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + { + auto *var = compiler.maybe_get(args[i]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[i]); + } + break; + } + + case OpPhi: + { + // Invalid SPIR-V. + if (length < 2) + return false; + + uint32_t count = length - 2; + args += 2; + for (uint32_t i = 0; i < count; i += 2) + { + auto *var = compiler.maybe_get(args[i]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[i]); + } + break; + } + + case OpAtomicStore: + case OpStore: + // Invalid SPIR-V. + if (length < 1) + return false; + variable = args[0]; + break; + + case OpCopyMemory: + { + if (length < 2) + return false; + + auto *var = compiler.maybe_get(args[0]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[0]); + + var = compiler.maybe_get(args[1]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[1]); + break; + } + + case OpExtInst: + { + if (length < 5) + return false; + auto &extension_set = compiler.get(args[2]); + switch (extension_set.ext) + { + case SPIRExtension::GLSL: + { + auto op = static_cast(args[3]); + + switch (op) + { + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + { + auto *var = compiler.maybe_get(args[4]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[4]); + break; + } + + default: + break; + } + break; + } + case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter: + { + enum AMDShaderExplicitVertexParameter + { + InterpolateAtVertexAMD = 1 + }; + + auto op = static_cast(args[3]); + + switch (op) + { + case InterpolateAtVertexAMD: + { + auto *var = compiler.maybe_get(args[4]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[4]); + break; + } + + default: + break; + } + break; + } + default: + break; + } + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + case OpLoad: + case OpCopyObject: + case OpImageTexelPointer: + case OpAtomicLoad: + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpArrayLength: + // Invalid SPIR-V. + if (length < 3) + return false; + variable = args[2]; + break; + } + + if (variable) + { + auto *var = compiler.maybe_get(variable); + if (var && storage_class_is_interface(var->storage)) + variables.insert(variable); + } + return true; +} + +unordered_set Compiler::get_active_interface_variables() const +{ + // Traverse the call graph and find all interface variables which are in use. + unordered_set variables; + InterfaceVariableAccessHandler handler(*this, variables); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + // Make sure we preserve output variables which are only initialized, but never accessed by any code. + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (var.storage == StorageClassOutput && var.initializer != ID(0)) + variables.insert(var.self); + }); + + // If we needed to create one, we'll need it. + if (dummy_sampler_id) + variables.insert(dummy_sampler_id); + + return variables; +} + +void Compiler::set_enabled_interface_variables(std::unordered_set active_variables) +{ + active_interface_variables = move(active_variables); + check_active_interface_variables = true; +} + +ShaderResources Compiler::get_shader_resources(const unordered_set *active_variables) const +{ + ShaderResources res; + + bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant(); + + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + auto &type = this->get(var.basetype); + + // It is possible for uniform storage classes to be passed as function parameters, so detect + // that. To detect function parameters, check of StorageClass of variable is function scope. + if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var)) + return; + + if (active_variables && active_variables->find(var.self) == end(*active_variables)) + return; + + // Input + if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self)) + { + if (has_decoration(type.self, DecorationBlock)) + { + res.stage_inputs.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); + } + else + res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Subpass inputs + else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData) + { + res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Outputs + else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self)) + { + if (has_decoration(type.self, DecorationBlock)) + { + res.stage_outputs.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); + } + else + res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // UBOs + else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock)) + { + res.uniform_buffers.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); + } + // Old way to declare SSBOs. + else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)) + { + res.storage_buffers.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); + } + // Modern way to declare SSBOs. + else if (type.storage == StorageClassStorageBuffer) + { + res.storage_buffers.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); + } + // Push constant blocks + else if (type.storage == StorageClassPushConstant) + { + // There can only be one push constant block, but keep the vector in case this restriction is lifted + // in the future. + res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Images + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && + type.image.sampled == 2) + { + res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Separate images + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && + type.image.sampled == 1) + { + res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Separate samplers + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler) + { + res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Textures + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage) + { + res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Atomic counters + else if (type.storage == StorageClassAtomicCounter) + { + res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Acceleration structures + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure) + { + res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + }); + + return res; +} + +bool Compiler::type_is_block_like(const SPIRType &type) const +{ + if (type.basetype != SPIRType::Struct) + return false; + + if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + { + return true; + } + + // Block-like types may have Offset decorations. + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + if (has_member_decoration(type.self, i, DecorationOffset)) + return true; + + return false; +} + +void Compiler::parse_fixup() +{ + // Figure out specialization constants for work group sizes. + for (auto id_ : ir.ids_for_constant_or_variable) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + if (ir.meta[c.self].decoration.builtin && ir.meta[c.self].decoration.builtin_type == BuiltInWorkgroupSize) + { + // In current SPIR-V, there can be just one constant like this. + // All entry points will receive the constant value. + for (auto &entry : ir.entry_points) + { + entry.second.workgroup_size.constant = c.self; + entry.second.workgroup_size.x = c.scalar(0, 0); + entry.second.workgroup_size.y = c.scalar(0, 1); + entry.second.workgroup_size.z = c.scalar(0, 2); + } + } + } + else if (id.get_type() == TypeVariable) + { + auto &var = id.get(); + if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || + var.storage == StorageClassOutput) + global_variables.push_back(var.self); + if (variable_storage_is_aliased(var)) + aliased_variables.push_back(var.self); + } + } +} + +void Compiler::update_name_cache(unordered_set &cache_primary, const unordered_set &cache_secondary, + string &name) +{ + if (name.empty()) + return; + + const auto find_name = [&](const string &n) -> bool { + if (cache_primary.find(n) != end(cache_primary)) + return true; + + if (&cache_primary != &cache_secondary) + if (cache_secondary.find(n) != end(cache_secondary)) + return true; + + return false; + }; + + const auto insert_name = [&](const string &n) { cache_primary.insert(n); }; + + if (!find_name(name)) + { + insert_name(name); + return; + } + + uint32_t counter = 0; + auto tmpname = name; + + bool use_linked_underscore = true; + + if (tmpname == "_") + { + // We cannot just append numbers, as we will end up creating internally reserved names. + // Make it like _0_ instead. + tmpname += "0"; + } + else if (tmpname.back() == '_') + { + // The last_character is an underscore, so we don't need to link in underscore. + // This would violate double underscore rules. + use_linked_underscore = false; + } + + // If there is a collision (very rare), + // keep tacking on extra identifier until it's unique. + do + { + counter++; + name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(counter); + } while (find_name(name)); + insert_name(name); +} + +void Compiler::update_name_cache(unordered_set &cache, string &name) +{ + update_name_cache(cache, cache, name); +} + +void Compiler::set_name(ID id, const std::string &name) +{ + ir.set_name(id, name); +} + +const SPIRType &Compiler::get_type(TypeID id) const +{ + return get(id); +} + +const SPIRType &Compiler::get_type_from_variable(VariableID id) const +{ + return get(get(id).basetype); +} + +uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const +{ + auto *p_type = &get(type_id); + if (p_type->pointer) + { + assert(p_type->parent_type); + type_id = p_type->parent_type; + } + return type_id; +} + +const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const +{ + auto *p_type = &type; + if (p_type->pointer) + { + assert(p_type->parent_type); + p_type = &get(p_type->parent_type); + } + return *p_type; +} + +const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const +{ + return get_pointee_type(get(type_id)); +} + +uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const +{ + if (var.phi_variable) + return var.basetype; + return get_pointee_type_id(var.basetype); +} + +SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) +{ + return get(get_variable_data_type_id(var)); +} + +const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const +{ + return get(get_variable_data_type_id(var)); +} + +SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) +{ + SPIRType *type = &get_variable_data_type(var); + if (is_array(*type)) + type = &get(type->parent_type); + return *type; +} + +const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const +{ + const SPIRType *type = &get_variable_data_type(var); + if (is_array(*type)) + type = &get(type->parent_type); + return *type; +} + +bool Compiler::is_sampled_image_type(const SPIRType &type) +{ + return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 && + type.image.dim != DimBuffer; +} + +void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, + const std::string &argument) +{ + ir.set_member_decoration_string(id, index, decoration, argument); +} + +void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) +{ + ir.set_member_decoration(id, index, decoration, argument); +} + +void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name) +{ + ir.set_member_name(id, index, name); +} + +const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const +{ + return ir.get_member_name(id, index); +} + +void Compiler::set_qualified_name(uint32_t id, const string &name) +{ + ir.meta[id].decoration.qualified_alias = name; +} + +void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name) +{ + ir.meta[type_id].members.resize(max(ir.meta[type_id].members.size(), size_t(index) + 1)); + ir.meta[type_id].members[index].qualified_alias = name; +} + +const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const +{ + auto *m = ir.find_meta(type_id); + if (m && index < m->members.size()) + return m->members[index].qualified_alias; + else + return ir.get_empty_string(); +} + +uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const +{ + return ir.get_member_decoration(id, index, decoration); +} + +const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const +{ + return ir.get_member_decoration_bitset(id, index); +} + +bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const +{ + return ir.has_member_decoration(id, index, decoration); +} + +void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) +{ + ir.unset_member_decoration(id, index, decoration); +} + +void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument) +{ + ir.set_decoration_string(id, decoration, argument); +} + +void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument) +{ + ir.set_decoration(id, decoration, argument); +} + +void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value) +{ + auto &dec = ir.meta[id].decoration; + dec.extended.flags.set(decoration); + dec.extended.values[decoration] = value; +} + +void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, + uint32_t value) +{ + ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); + auto &dec = ir.meta[type].members[index]; + dec.extended.flags.set(decoration); + dec.extended.values[decoration] = value; +} + +static uint32_t get_default_extended_decoration(ExtendedDecorations decoration) +{ + switch (decoration) + { + case SPIRVCrossDecorationResourceIndexPrimary: + case SPIRVCrossDecorationResourceIndexSecondary: + case SPIRVCrossDecorationResourceIndexTertiary: + case SPIRVCrossDecorationResourceIndexQuaternary: + case SPIRVCrossDecorationInterfaceMemberIndex: + return ~(0u); + + default: + return 0; + } +} + +uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(id); + if (!m) + return 0; + + auto &dec = m->decoration; + + if (!dec.extended.flags.get(decoration)) + return get_default_extended_decoration(decoration); + + return dec.extended.values[decoration]; +} + +uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(type); + if (!m) + return 0; + + if (index >= m->members.size()) + return 0; + + auto &dec = m->members[index]; + if (!dec.extended.flags.get(decoration)) + return get_default_extended_decoration(decoration); + return dec.extended.values[decoration]; +} + +bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(id); + if (!m) + return false; + + auto &dec = m->decoration; + return dec.extended.flags.get(decoration); +} + +bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(type); + if (!m) + return false; + + if (index >= m->members.size()) + return false; + + auto &dec = m->members[index]; + return dec.extended.flags.get(decoration); +} + +void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration) +{ + auto &dec = ir.meta[id].decoration; + dec.extended.flags.clear(decoration); + dec.extended.values[decoration] = 0; +} + +void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) +{ + ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); + auto &dec = ir.meta[type].members[index]; + dec.extended.flags.clear(decoration); + dec.extended.values[decoration] = 0; +} + +StorageClass Compiler::get_storage_class(VariableID id) const +{ + return get(id).storage; +} + +const std::string &Compiler::get_name(ID id) const +{ + return ir.get_name(id); +} + +const std::string Compiler::get_fallback_name(ID id) const +{ + return join("_", id); +} + +const std::string Compiler::get_block_fallback_name(VariableID id) const +{ + auto &var = get(id); + if (get_name(id).empty()) + return join("_", get(var.basetype).self, "_", id); + else + return get_name(id); +} + +const Bitset &Compiler::get_decoration_bitset(ID id) const +{ + return ir.get_decoration_bitset(id); +} + +bool Compiler::has_decoration(ID id, Decoration decoration) const +{ + return ir.has_decoration(id, decoration); +} + +const string &Compiler::get_decoration_string(ID id, Decoration decoration) const +{ + return ir.get_decoration_string(id, decoration); +} + +const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const +{ + return ir.get_member_decoration_string(id, index, decoration); +} + +uint32_t Compiler::get_decoration(ID id, Decoration decoration) const +{ + return ir.get_decoration(id, decoration); +} + +void Compiler::unset_decoration(ID id, Decoration decoration) +{ + ir.unset_decoration(id, decoration); +} + +bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const +{ + auto *m = ir.find_meta(id); + if (!m) + return false; + + auto &word_offsets = m->decoration_word_offset; + auto itr = word_offsets.find(decoration); + if (itr == end(word_offsets)) + return false; + + word_offset = itr->second; + return true; +} + +bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const +{ + // Tried and failed. + if (block.disable_block_optimization || block.complex_continue) + return false; + + if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) + { + // Try to detect common for loop pattern + // which the code backend can use to create cleaner code. + // for(;;) { if (cond) { some_body; } else { break; } } + // is the pattern we're looking for. + const auto *false_block = maybe_get(block.false_block); + const auto *true_block = maybe_get(block.true_block); + const auto *merge_block = maybe_get(block.merge_block); + + bool false_block_is_merge = block.false_block == block.merge_block || + (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); + + bool true_block_is_merge = block.true_block == block.merge_block || + (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); + + bool positive_candidate = + block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge; + + bool negative_candidate = + block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge; + + bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop && + (positive_candidate || negative_candidate); + + if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) + ret = block.true_block == block.continue_block; + else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) + ret = block.false_block == block.continue_block; + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self) + return false; + } + return ret; + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + // Empty loop header that just sets up merge target + // and branches to loop body. + bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty(); + + if (!ret) + return false; + + auto &child = get(block.next_block); + + const auto *false_block = maybe_get(child.false_block); + const auto *true_block = maybe_get(child.true_block); + const auto *merge_block = maybe_get(block.merge_block); + + bool false_block_is_merge = child.false_block == block.merge_block || + (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); + + bool true_block_is_merge = child.true_block == block.merge_block || + (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); + + bool positive_candidate = + child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge; + + bool negative_candidate = + child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge; + + ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone && + (positive_candidate || negative_candidate); + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self || phi.parent == child.self) + return false; + + for (auto &phi : child.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self || phi.parent == child.false_block) + return false; + } + + return ret; + } + else + return false; +} + +bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const +{ + if (!execution_is_branchless(from, to)) + return false; + + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (!start->ops.empty()) + return false; + + auto &next = get(start->next_block); + // Flushing phi variables does not count as noop. + for (auto &phi : next.phi_variables) + if (phi.parent == start->self) + return false; + + start = &next; + } +} + +bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const +{ + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone) + start = &get(start->next_block); + else + return false; + } +} + +bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const +{ + return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self; +} + +SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const +{ + // The block was deemed too complex during code emit, pick conservative fallback paths. + if (block.complex_continue) + return SPIRBlock::ComplexLoop; + + // In older glslang output continue block can be equal to the loop header. + // In this case, execution is clearly branchless, so just assume a while loop header here. + if (block.merge == SPIRBlock::MergeLoop) + return SPIRBlock::WhileLoop; + + if (block.loop_dominator == BlockID(SPIRBlock::NoDominator)) + { + // Continue block is never reached from CFG. + return SPIRBlock::ComplexLoop; + } + + auto &dominator = get(block.loop_dominator); + + if (execution_is_noop(block, dominator)) + return SPIRBlock::WhileLoop; + else if (execution_is_branchless(block, dominator)) + return SPIRBlock::ForLoop; + else + { + const auto *false_block = maybe_get(block.false_block); + const auto *true_block = maybe_get(block.true_block); + const auto *merge_block = maybe_get(dominator.merge_block); + + // If we need to flush Phi in this block, we cannot have a DoWhile loop. + bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block); + bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block); + if (flush_phi_to_false || flush_phi_to_true) + return SPIRBlock::ComplexLoop; + + bool positive_do_while = block.true_block == dominator.self && + (block.false_block == dominator.merge_block || + (false_block && merge_block && execution_is_noop(*false_block, *merge_block))); + + bool negative_do_while = block.false_block == dominator.self && + (block.true_block == dominator.merge_block || + (true_block && merge_block && execution_is_noop(*true_block, *merge_block))); + + if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select && + (positive_do_while || negative_do_while)) + { + return SPIRBlock::DoWhileLoop; + } + else + return SPIRBlock::ComplexLoop; + } +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const +{ + handler.set_current_block(block); + handler.rearm_current_block(block); + + // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, + // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing + // inside dead blocks ... + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + if (!handler.handle(op, ops, i.length)) + return false; + + if (op == OpFunctionCall) + { + auto &func = get(ops[2]); + if (handler.follow_function_call(func)) + { + if (!handler.begin_function_scope(ops, i.length)) + return false; + if (!traverse_all_reachable_opcodes(get(ops[2]), handler)) + return false; + if (!handler.end_function_scope(ops, i.length)) + return false; + + handler.rearm_current_block(block); + } + } + } + + return true; +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const +{ + for (auto block : func.blocks) + if (!traverse_all_reachable_opcodes(get(block), handler)) + return false; + + return true; +} + +uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.self); + if (type_meta) + { + // Decoration must be set in valid SPIR-V, otherwise throw. + auto &dec = type_meta->members[index]; + if (dec.decoration_flags.get(DecorationOffset)) + return dec.offset; + else + SPIRV_CROSS_THROW("Struct member does not have Offset set."); + } + else + SPIRV_CROSS_THROW("Struct member does not have Offset set."); +} + +uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.member_types[index]); + if (type_meta) + { + // Decoration must be set in valid SPIR-V, otherwise throw. + // ArrayStride is part of the array type not OpMemberDecorate. + auto &dec = type_meta->decoration; + if (dec.decoration_flags.get(DecorationArrayStride)) + return dec.array_stride; + else + SPIRV_CROSS_THROW("Struct member does not have ArrayStride set."); + } + else + SPIRV_CROSS_THROW("Struct member does not have ArrayStride set."); +} + +uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.self); + if (type_meta) + { + // Decoration must be set in valid SPIR-V, otherwise throw. + // MatrixStride is part of OpMemberDecorate. + auto &dec = type_meta->members[index]; + if (dec.decoration_flags.get(DecorationMatrixStride)) + return dec.matrix_stride; + else + SPIRV_CROSS_THROW("Struct member does not have MatrixStride set."); + } + else + SPIRV_CROSS_THROW("Struct member does not have MatrixStride set."); +} + +size_t Compiler::get_declared_struct_size(const SPIRType &type) const +{ + if (type.member_types.empty()) + SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); + + uint32_t last = uint32_t(type.member_types.size() - 1); + size_t offset = type_struct_member_offset(type, last); + size_t size = get_declared_struct_member_size(type, last); + return offset + size; +} + +size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const +{ + if (type.member_types.empty()) + SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); + + size_t size = get_declared_struct_size(type); + auto &last_type = get(type.member_types.back()); + if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array + size += array_size * type_struct_member_array_stride(type, uint32_t(type.member_types.size() - 1)); + + return size; +} + +uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const +{ + auto &result_type = get(spec.basetype); + if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int && + result_type.basetype != SPIRType::Boolean) + { + SPIRV_CROSS_THROW( + "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n"); + } + + if (!is_scalar(result_type)) + SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); + + uint32_t value = 0; + + const auto eval_u32 = [&](uint32_t id) -> uint32_t { + auto &type = expression_type(id); + if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean) + { + SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating " + "specialization constants.\n"); + } + + if (!is_scalar(type)) + SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n"); + if (const auto *c = this->maybe_get(id)) + return c->scalar(); + else + return evaluate_spec_constant_u32(this->get(id)); + }; + +#define binary_spec_op(op, binary_op) \ + case Op##op: \ + value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \ + break +#define binary_spec_op_cast(op, binary_op, type) \ + case Op##op: \ + value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \ + break + + // Support the basic opcodes which are typically used when computing array sizes. + switch (spec.opcode) + { + binary_spec_op(IAdd, +); + binary_spec_op(ISub, -); + binary_spec_op(IMul, *); + binary_spec_op(BitwiseAnd, &); + binary_spec_op(BitwiseOr, |); + binary_spec_op(BitwiseXor, ^); + binary_spec_op(LogicalAnd, &); + binary_spec_op(LogicalOr, |); + binary_spec_op(ShiftLeftLogical, <<); + binary_spec_op(ShiftRightLogical, >>); + binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t); + binary_spec_op(LogicalEqual, ==); + binary_spec_op(LogicalNotEqual, !=); + binary_spec_op(IEqual, ==); + binary_spec_op(INotEqual, !=); + binary_spec_op(ULessThan, <); + binary_spec_op(ULessThanEqual, <=); + binary_spec_op(UGreaterThan, >); + binary_spec_op(UGreaterThanEqual, >=); + binary_spec_op_cast(SLessThan, <, int32_t); + binary_spec_op_cast(SLessThanEqual, <=, int32_t); + binary_spec_op_cast(SGreaterThan, >, int32_t); + binary_spec_op_cast(SGreaterThanEqual, >=, int32_t); +#undef binary_spec_op +#undef binary_spec_op_cast + + case OpLogicalNot: + value = uint32_t(!eval_u32(spec.arguments[0])); + break; + + case OpNot: + value = ~eval_u32(spec.arguments[0]); + break; + + case OpSNegate: + value = uint32_t(-int32_t(eval_u32(spec.arguments[0]))); + break; + + case OpSelect: + value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]); + break; + + case OpUMod: + { + uint32_t a = eval_u32(spec.arguments[0]); + uint32_t b = eval_u32(spec.arguments[1]); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n"); + value = a % b; + break; + } + + case OpSRem: + { + auto a = int32_t(eval_u32(spec.arguments[0])); + auto b = int32_t(eval_u32(spec.arguments[1])); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n"); + value = a % b; + break; + } + + case OpSMod: + { + auto a = int32_t(eval_u32(spec.arguments[0])); + auto b = int32_t(eval_u32(spec.arguments[1])); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n"); + auto v = a % b; + + // Makes sure we match the sign of b, not a. + if ((b < 0 && v > 0) || (b > 0 && v < 0)) + v += b; + value = v; + break; + } + + case OpUDiv: + { + uint32_t a = eval_u32(spec.arguments[0]); + uint32_t b = eval_u32(spec.arguments[1]); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n"); + value = a / b; + break; + } + + case OpSDiv: + { + auto a = int32_t(eval_u32(spec.arguments[0])); + auto b = int32_t(eval_u32(spec.arguments[1])); + if (b == 0) + SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n"); + value = a / b; + break; + } + + default: + SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n"); + } + + return value; +} + +uint32_t Compiler::evaluate_constant_u32(uint32_t id) const +{ + if (const auto *c = maybe_get(id)) + return c->scalar(); + else + return evaluate_spec_constant_u32(get(id)); +} + +size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +{ + if (struct_type.member_types.empty()) + SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); + + auto &flags = get_member_decoration_bitset(struct_type.self, index); + auto &type = get(struct_type.member_types[index]); + + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types. + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying size for object with opaque size."); + + default: + break; + } + + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + { + // Check if this is a top-level pointer type, and not an array of pointers. + if (type.pointer_depth > get(type.parent_type).pointer_depth) + return 8; + } + + if (!type.array.empty()) + { + // For arrays, we can use ArrayStride to get an easy check. + bool array_size_literal = type.array_size_literal.back(); + uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back()); + return type_struct_member_array_stride(struct_type, index) * array_size; + } + else if (type.basetype == SPIRType::Struct) + { + return get_declared_struct_size(type); + } + else + { + unsigned vecsize = type.vecsize; + unsigned columns = type.columns; + + // Vectors. + if (columns == 1) + { + size_t component_size = type.width / 8; + return vecsize * component_size; + } + else + { + uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index); + + // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses. + if (flags.get(DecorationRowMajor)) + return matrix_stride * vecsize; + else if (flags.get(DecorationColMajor)) + return matrix_stride * columns; + else + SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices."); + } + } +} + +bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain) + return true; + + bool ptr_chain = (opcode == OpPtrAccessChain); + + // Invalid SPIR-V. + if (length < (ptr_chain ? 5u : 4u)) + return false; + + if (args[2] != id) + return true; + + // Don't bother traversing the entire access chain tree yet. + // If we access a struct member, assume we access the entire member. + uint32_t index = compiler.get(args[ptr_chain ? 4 : 3]).scalar(); + + // Seen this index already. + if (seen.find(index) != end(seen)) + return true; + seen.insert(index); + + auto &type = compiler.expression_type(id); + uint32_t offset = compiler.type_struct_member_offset(type, index); + + size_t range; + // If we have another member in the struct, deduce the range by looking at the next member. + // This is okay since structs in SPIR-V can have padding, but Offset decoration must be + // monotonically increasing. + // Of course, this doesn't take into account if the SPIR-V for some reason decided to add + // very large amounts of padding, but that's not really a big deal. + if (index + 1 < type.member_types.size()) + { + range = compiler.type_struct_member_offset(type, index + 1) - offset; + } + else + { + // No padding, so just deduce it from the size of the member directly. + range = compiler.get_declared_struct_member_size(type, index); + } + + ranges.push_back({ index, offset, range }); + return true; +} + +SmallVector Compiler::get_active_buffer_ranges(VariableID id) const +{ + SmallVector ranges; + BufferAccessHandler handler(*this, ranges, id); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + return ranges; +} + +bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const +{ + if (a.basetype != b.basetype) + return false; + if (a.width != b.width) + return false; + if (a.vecsize != b.vecsize) + return false; + if (a.columns != b.columns) + return false; + if (a.array.size() != b.array.size()) + return false; + + size_t array_count = a.array.size(); + if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0) + return false; + + if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage) + { + if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0) + return false; + } + + if (a.member_types.size() != b.member_types.size()) + return false; + + size_t member_types = a.member_types.size(); + for (size_t i = 0; i < member_types; i++) + { + if (!types_are_logically_equivalent(get(a.member_types[i]), get(b.member_types[i]))) + return false; + } + + return true; +} + +const Bitset &Compiler::get_execution_mode_bitset() const +{ + return get_entry_point().flags; +} + +void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2) +{ + auto &execution = get_entry_point(); + + execution.flags.set(mode); + switch (mode) + { + case ExecutionModeLocalSize: + execution.workgroup_size.x = arg0; + execution.workgroup_size.y = arg1; + execution.workgroup_size.z = arg2; + break; + + case ExecutionModeInvocations: + execution.invocations = arg0; + break; + + case ExecutionModeOutputVertices: + execution.output_vertices = arg0; + break; + + default: + break; + } +} + +void Compiler::unset_execution_mode(ExecutionMode mode) +{ + auto &execution = get_entry_point(); + execution.flags.clear(mode); +} + +uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y, + SpecializationConstant &z) const +{ + auto &execution = get_entry_point(); + x = { 0, 0 }; + y = { 0, 0 }; + z = { 0, 0 }; + + if (execution.workgroup_size.constant != 0) + { + auto &c = get(execution.workgroup_size.constant); + + if (c.m.c[0].id[0] != ID(0)) + { + x.id = c.m.c[0].id[0]; + x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId); + } + + if (c.m.c[0].id[1] != ID(0)) + { + y.id = c.m.c[0].id[1]; + y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId); + } + + if (c.m.c[0].id[2] != ID(0)) + { + z.id = c.m.c[0].id[2]; + z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId); + } + } + + return execution.workgroup_size.constant; +} + +uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const +{ + auto &execution = get_entry_point(); + switch (mode) + { + case ExecutionModeLocalSize: + switch (index) + { + case 0: + return execution.workgroup_size.x; + case 1: + return execution.workgroup_size.y; + case 2: + return execution.workgroup_size.z; + default: + return 0; + } + + case ExecutionModeInvocations: + return execution.invocations; + + case ExecutionModeOutputVertices: + return execution.output_vertices; + + default: + return 0; + } +} + +ExecutionModel Compiler::get_execution_model() const +{ + auto &execution = get_entry_point(); + return execution.model; +} + +bool Compiler::is_tessellation_shader(ExecutionModel model) +{ + return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; +} + +bool Compiler::is_vertex_like_shader() const +{ + auto model = get_execution_model(); + return model == ExecutionModelVertex || model == ExecutionModelGeometry || + model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; +} + +bool Compiler::is_tessellation_shader() const +{ + return is_tessellation_shader(get_execution_model()); +} + +void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable) +{ + get(id).remapped_variable = remap_enable; +} + +bool Compiler::get_remapped_variable_state(VariableID id) const +{ + return get(id).remapped_variable; +} + +void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components) +{ + get(id).remapped_components = components; +} + +uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const +{ + return get(id).remapped_components; +} + +void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source) +{ + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); + if (itr == end(e.implied_read_expressions)) + e.implied_read_expressions.push_back(source); +} + +void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source) +{ + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); + if (itr == end(e.implied_read_expressions)) + e.implied_read_expressions.push_back(source); +} + +void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression) +{ + // Don't inherit any expression dependencies if the expression in dst + // is not a forwarded temporary. + if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) || + forced_temporaries.find(dst) != end(forced_temporaries)) + { + return; + } + + auto &e = get(dst); + auto *phi = maybe_get(source_expression); + if (phi && phi->phi_variable) + { + // We have used a phi variable, which can change at the end of the block, + // so make sure we take a dependency on this phi variable. + phi->dependees.push_back(dst); + } + + auto *s = maybe_get(source_expression); + if (!s) + return; + + auto &e_deps = e.expression_dependencies; + auto &s_deps = s->expression_dependencies; + + // If we depend on a expression, we also depend on all sub-dependencies from source. + e_deps.push_back(source_expression); + e_deps.insert(end(e_deps), begin(s_deps), end(s_deps)); + + // Eliminate duplicated dependencies. + sort(begin(e_deps), end(e_deps)); + e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps)); +} + +SmallVector Compiler::get_entry_points_and_stages() const +{ + SmallVector entries; + for (auto &entry : ir.entry_points) + entries.push_back({ entry.second.orig_name, entry.second.model }); + return entries; +} + +void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model) +{ + auto &entry = get_entry_point(old_name, model); + entry.orig_name = new_name; + entry.name = new_name; +} + +void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model) +{ + auto &entry = get_entry_point(name, model); + ir.default_entry_point = entry.self; +} + +SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) +{ + auto itr = find_if( + begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair &entry) -> bool { return entry.second.orig_name == name; }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const +{ + auto itr = find_if( + begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair &entry) -> bool { return entry.second.orig_name == name; }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) +{ + auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair &entry) -> bool { + return entry.second.orig_name == name && entry.second.model == model; + }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const +{ + auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair &entry) -> bool { + return entry.second.orig_name == name && entry.second.model == model; + }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const +{ + return get_entry_point(name, model).name; +} + +const SPIREntryPoint &Compiler::get_entry_point() const +{ + return ir.entry_points.find(ir.default_entry_point)->second; +} + +SPIREntryPoint &Compiler::get_entry_point() +{ + return ir.entry_points.find(ir.default_entry_point)->second; +} + +bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const +{ + auto &var = get(id); + if (var.storage != StorageClassInput && var.storage != StorageClassOutput && + var.storage != StorageClassUniformConstant) + SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface."); + + // This is to avoid potential problems with very old glslang versions which did + // not emit input/output interfaces properly. + // We can assume they only had a single entry point, and single entry point + // shaders could easily be assumed to use every interface variable anyways. + if (ir.entry_points.size() <= 1) + return true; + + auto &execution = get_entry_point(); + return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) != + end(execution.interface_variables); +} + +void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args, + uint32_t length) +{ + // If possible, pipe through a remapping table so that parameters know + // which variables they actually bind to in this scope. + unordered_map remapping; + for (uint32_t i = 0; i < length; i++) + remapping[func.arguments[i].id] = remap_parameter(args[i]); + parameter_remapping.push(move(remapping)); +} + +void Compiler::CombinedImageSamplerHandler::pop_remap_parameters() +{ + parameter_remapping.pop(); +} + +uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id) +{ + auto *var = compiler.maybe_get_backing_variable(id); + if (var) + id = var->self; + + if (parameter_remapping.empty()) + return id; + + auto &remapping = parameter_remapping.top(); + auto itr = remapping.find(id); + if (itr != end(remapping)) + return itr->second; + else + return id; +} + +bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &callee = compiler.get(args[2]); + args += 3; + length -= 3; + push_remap_parameters(callee, args, length); + functions.push(&callee); + return true; +} + +bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &callee = compiler.get(args[2]); + args += 3; + + // There are two types of cases we have to handle, + // a callee might call sampler2D(texture2D, sampler) directly where + // one or more parameters originate from parameters. + // Alternatively, we need to provide combined image samplers to our callees, + // and in this case we need to add those as well. + + pop_remap_parameters(); + + // Our callee has now been processed at least once. + // No point in doing it again. + callee.do_combined_parameters = false; + + auto ¶ms = functions.top()->combined_parameters; + functions.pop(); + if (functions.empty()) + return true; + + auto &caller = *functions.top(); + if (caller.do_combined_parameters) + { + for (auto ¶m : params) + { + VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]); + VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]); + + auto *i = compiler.maybe_get_backing_variable(image_id); + auto *s = compiler.maybe_get_backing_variable(sampler_id); + if (i) + image_id = i->self; + if (s) + sampler_id = s->self; + + register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth); + } + } + + return true; +} + +void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, + VariableID combined_module_id, + VariableID image_id, VariableID sampler_id, + bool depth) +{ + // We now have a texture ID and a sampler ID which will either be found as a global + // or a parameter in our own function. If both are global, they will not need a parameter, + // otherwise, add it to our list. + SPIRFunction::CombinedImageSamplerParameter param = { + 0u, image_id, sampler_id, true, true, depth, + }; + + auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments), + [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; }); + auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments), + [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; }); + + if (texture_itr != end(caller.arguments)) + { + param.global_image = false; + param.image_id = uint32_t(texture_itr - begin(caller.arguments)); + } + + if (sampler_itr != end(caller.arguments)) + { + param.global_sampler = false; + param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments)); + } + + if (param.global_image && param.global_sampler) + return; + + auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters), + [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) { + return param.image_id == p.image_id && param.sampler_id == p.sampler_id && + param.global_image == p.global_image && param.global_sampler == p.global_sampler; + }); + + if (itr == end(caller.combined_parameters)) + { + uint32_t id = compiler.ir.increase_bound_by(3); + auto type_id = id + 0; + auto ptr_type_id = id + 1; + auto combined_id = id + 2; + auto &base = compiler.expression_type(image_id); + auto &type = compiler.set(type_id); + auto &ptr_type = compiler.set(ptr_type_id); + + type = base; + type.self = type_id; + type.basetype = SPIRType::SampledImage; + type.pointer = false; + type.storage = StorageClassGeneric; + type.image.depth = depth; + + ptr_type = type; + ptr_type.pointer = true; + ptr_type.storage = StorageClassUniformConstant; + ptr_type.parent_type = type_id; + + // Build new variable. + compiler.set(combined_id, ptr_type_id, StorageClassFunction, 0); + + // Inherit RelaxedPrecision. + // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. + bool relaxed_precision = + compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) || + compiler.has_decoration(image_id, DecorationRelaxedPrecision) || + (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); + + if (relaxed_precision) + compiler.set_decoration(combined_id, DecorationRelaxedPrecision); + + param.id = combined_id; + + compiler.set_name(combined_id, + join("SPIRV_Cross_Combined", compiler.to_name(image_id), compiler.to_name(sampler_id))); + + caller.combined_parameters.push_back(param); + caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u, true }); + } +} + +bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + if (need_dummy_sampler) + { + // No need to traverse further, we know the result. + return false; + } + + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + bool separate_image = + type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; + + // If not separate image, don't bother. + if (!separate_image) + return true; + + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + break; + } + + case OpImageFetch: + case OpImageQuerySizeLod: + case OpImageQuerySize: + case OpImageQueryLevels: + case OpImageQuerySamples: + { + // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler. + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + { + auto &type = compiler.get(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + need_dummy_sampler = true; + } + + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + auto &type = compiler.get(result_type); + bool separate_image = + type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; + if (!separate_image) + return true; + + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + + // Other backends might use SPIRAccessChain for this later. + compiler.ir.ids[id].set_allow_type_rewrite(); + break; + } + + default: + break; + } + + return true; +} + +bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need. + bool is_fetch = false; + + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + + // If not separate image or sampler, don't bother. + if (!separate_image && !separate_sampler) + return true; + + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + return true; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially + // impossible to implement, since we don't know which concrete sampler we are accessing. + // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds, + // but this seems ridiculously complicated for a problem which is easy to work around. + // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense. + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (separate_sampler) + SPIRV_CROSS_THROW( + "Attempting to use arrays or structs of separate samplers. This is not possible to statically " + "remap to plain GLSL."); + + if (separate_image) + { + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + } + return true; + } + + case OpImageFetch: + case OpImageQuerySizeLod: + case OpImageQuerySize: + case OpImageQueryLevels: + case OpImageQuerySamples: + { + // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler. + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (!var) + return true; + + auto &type = compiler.get(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + { + if (compiler.dummy_sampler_id == 0) + SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with " + "build_dummy_sampler_for_combined_images()."); + + // Do it outside. + is_fetch = true; + break; + } + + return true; + } + + case OpSampledImage: + // Do it outside. + break; + + default: + return true; + } + + // Registers sampler2D calls used in case they are parameters so + // that their callees know which combined image samplers to propagate down the call stack. + if (!functions.empty()) + { + auto &callee = *functions.top(); + if (callee.do_combined_parameters) + { + uint32_t image_id = args[2]; + + auto *image = compiler.maybe_get_backing_variable(image_id); + if (image) + image_id = image->self; + + uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3]; + auto *sampler = compiler.maybe_get_backing_variable(sampler_id); + if (sampler) + sampler_id = sampler->self; + + uint32_t combined_id = args[1]; + + auto &combined_type = compiler.get(args[0]); + register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth); + } + } + + // For function calls, we need to remap IDs which are function parameters into global variables. + // This information is statically known from the current place in the call stack. + // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know + // which backing variable the image/sample came from. + VariableID image_id = remap_parameter(args[2]); + VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); + + auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), + [image_id, sampler_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == sampler_id; + }); + + if (itr == end(compiler.combined_image_samplers)) + { + uint32_t sampled_type; + uint32_t combined_module_id; + if (is_fetch) + { + // Have to invent the sampled image type. + sampled_type = compiler.ir.increase_bound_by(1); + auto &type = compiler.set(sampled_type); + type = compiler.expression_type(args[2]); + type.self = sampled_type; + type.basetype = SPIRType::SampledImage; + type.image.depth = false; + combined_module_id = 0; + } + else + { + sampled_type = args[0]; + combined_module_id = args[1]; + } + + auto id = compiler.ir.increase_bound_by(2); + auto type_id = id + 0; + auto combined_id = id + 1; + + // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type. + // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes. + auto &type = compiler.set(type_id); + auto &base = compiler.get(sampled_type); + type = base; + type.pointer = true; + type.storage = StorageClassUniformConstant; + type.parent_type = type_id; + + // Build new variable. + compiler.set(combined_id, type_id, StorageClassUniformConstant, 0); + + // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). + // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. + bool relaxed_precision = + (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) || + (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) || + (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); + + if (relaxed_precision) + compiler.set_decoration(combined_id, DecorationRelaxedPrecision); + + // Propagate the array type for the original image as well. + auto *var = compiler.maybe_get_backing_variable(image_id); + if (var) + { + auto &parent_type = compiler.get(var->basetype); + type.array = parent_type.array; + type.array_size_literal = parent_type.array_size_literal; + } + + compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id }); + } + + return true; +} + +VariableID Compiler::build_dummy_sampler_for_combined_images() +{ + DummySamplerForCombinedImageHandler handler(*this); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + if (handler.need_dummy_sampler) + { + uint32_t offset = ir.increase_bound_by(3); + auto type_id = offset + 0; + auto ptr_type_id = offset + 1; + auto var_id = offset + 2; + + SPIRType sampler_type; + auto &sampler = set(type_id); + sampler.basetype = SPIRType::Sampler; + + auto &ptr_sampler = set(ptr_type_id); + ptr_sampler = sampler; + ptr_sampler.self = type_id; + ptr_sampler.storage = StorageClassUniformConstant; + ptr_sampler.pointer = true; + ptr_sampler.parent_type = type_id; + + set(var_id, ptr_type_id, StorageClassUniformConstant, 0); + set_name(var_id, "SPIRV_Cross_DummySampler"); + dummy_sampler_id = var_id; + return var_id; + } + else + return 0; +} + +void Compiler::build_combined_image_samplers() +{ + ir.for_each_typed_id([&](uint32_t, SPIRFunction &func) { + func.combined_parameters.clear(); + func.shadow_arguments.clear(); + func.do_combined_parameters = true; + }); + + combined_image_samplers.clear(); + CombinedImageSamplerHandler handler(*this); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); +} + +SmallVector Compiler::get_specialization_constants() const +{ + SmallVector spec_consts; + ir.for_each_typed_id([&](uint32_t, const SPIRConstant &c) { + if (c.specialization && has_decoration(c.self, DecorationSpecId)) + spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) }); + }); + return spec_consts; +} + +SPIRConstant &Compiler::get_constant(ConstantID id) +{ + return get(id); +} + +const SPIRConstant &Compiler::get_constant(ConstantID id) const +{ + return get(id); +} + +static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set &blocks) +{ + // This block accesses the variable. + if (blocks.find(block) != end(blocks)) + return false; + + // We are at the end of the CFG. + if (cfg.get_succeeding_edges(block).empty()) + return true; + + // If any of our successors have a path to the end, there exists a path from block. + for (auto &succ : cfg.get_succeeding_edges(block)) + if (exists_unaccessed_path_to_return(cfg, succ, blocks)) + return true; + + return false; +} + +void Compiler::analyze_parameter_preservation( + SPIRFunction &entry, const CFG &cfg, const unordered_map> &variable_to_blocks, + const unordered_map> &complete_write_blocks) +{ + for (auto &arg : entry.arguments) + { + // Non-pointers are always inputs. + auto &type = get(arg.type); + if (!type.pointer) + continue; + + // Opaque argument types are always in + bool potential_preserve; + switch (type.basetype) + { + case SPIRType::Sampler: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::AtomicCounter: + potential_preserve = false; + break; + + default: + potential_preserve = true; + break; + } + + if (!potential_preserve) + continue; + + auto itr = variable_to_blocks.find(arg.id); + if (itr == end(variable_to_blocks)) + { + // Variable is never accessed. + continue; + } + + // We have accessed a variable, but there was no complete writes to that variable. + // We deduce that we must preserve the argument. + itr = complete_write_blocks.find(arg.id); + if (itr == end(complete_write_blocks)) + { + arg.read_count++; + continue; + } + + // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state + // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function. + // Major case here is if a function is + // void foo(int &var) { if (cond) var = 10; } + // Using read/write counts, we will think it's just an out variable, but it really needs to be inout, + // because if we don't write anything whatever we put into the function must return back to the caller. + if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second)) + arg.read_count++; + } +} + +Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_, + SPIRFunction &entry_) + : compiler(compiler_) + , entry(entry_) +{ +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &) +{ + // Only analyze within this function. + return false; +} + +void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block) +{ + current_block = █ + + // If we're branching to a block which uses OpPhi, in GLSL + // this will be a variable write when we branch, + // so we need to track access to these variables as well to + // have a complete picture. + const auto test_phi = [this, &block](uint32_t to) { + auto &next = compiler.get(to); + for (auto &phi : next.phi_variables) + { + if (phi.parent == block.self) + { + accessed_variables_to_block[phi.function_variable].insert(block.self); + // Phi variables are also accessed in our target branch block. + accessed_variables_to_block[phi.function_variable].insert(next.self); + + notify_variable_access(phi.local_variable, block.self); + } + } + }; + + switch (block.terminator) + { + case SPIRBlock::Direct: + notify_variable_access(block.condition, block.self); + test_phi(block.next_block); + break; + + case SPIRBlock::Select: + notify_variable_access(block.condition, block.self); + test_phi(block.true_block); + test_phi(block.false_block); + break; + + case SPIRBlock::MultiSelect: + notify_variable_access(block.condition, block.self); + for (auto &target : block.cases) + test_phi(target.block); + if (block.default_block) + test_phi(block.default_block); + break; + + default: + break; + } +} + +void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block) +{ + if (id == 0) + return; + + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. + auto itr = access_chain_children.find(id); + if (itr != end(access_chain_children)) + for (auto child_id : itr->second) + notify_variable_access(child_id, block); + + if (id_is_phi_variable(id)) + accessed_variables_to_block[id].insert(block); + else if (id_is_potential_temporary(id)) + accessed_temporaries_to_block[id].insert(block); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + auto *var = compiler.maybe_get(id); + return var && var->phi_variable; +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + + // Temporaries are not created before we start emitting code. + return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + // Keep track of the types of temporaries, so we can hoist them out as necessary. + uint32_t result_type, result_id; + if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) + result_id_to_type[result_id] = result_type; + + switch (op) + { + case OpStore: + { + if (length < 2) + return false; + + ID ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // If we store through an access chain, we have a partial write. + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == ptr) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + // args[0] might be an access chain we have to track use of. + notify_variable_access(args[0], current_block->self); + // Might try to store a Phi variable here. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get(ptr); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + access_chain_children[args[1]].insert(var->self); + } + + // args[2] might be another access chain we have to track use of. + for (uint32_t i = 2; i < length; i++) + { + notify_variable_access(args[i], current_block->self); + access_chain_children[args[1]].insert(args[i]); + } + + // Also keep track of the access chain pointer itself. + // In exceptionally rare cases, we can end up with a case where + // the access chain is generated in the loop body, but is consumed in continue block. + // This means we need complex loop workarounds, and we must detect this via CFG analysis. + notify_variable_access(args[1], current_block->self); + + // The result of an access chain is a fixed expression and is not really considered a temporary. + auto &e = compiler.set(args[1], "", args[0], true); + auto *backing_variable = compiler.maybe_get_backing_variable(ptr); + e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0); + + // Other backends might use SPIRAccessChain for this later. + compiler.ir.ids[args[1]].set_allow_type_rewrite(); + access_chain_expressions.insert(args[1]); + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + ID lhs = args[0]; + ID rhs = args[1]; + auto *var = compiler.maybe_get_backing_variable(lhs); + + // If we store through an access chain, we have a partial write. + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == lhs) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + // args[0:1] might be access chains we have to track use of. + for (uint32_t i = 0; i < 2; i++) + notify_variable_access(args[i], current_block->self); + + var = compiler.maybe_get_backing_variable(rhs); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpCopyObject: + { + if (length < 3) + return false; + + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Might be an access chain which we have to keep track of. + notify_variable_access(args[1], current_block->self); + if (access_chain_expressions.count(args[2])) + access_chain_expressions.insert(args[1]); + + // Might try to copy a Phi variable here. + notify_variable_access(args[2], current_block->self); + break; + } + + case OpLoad: + { + if (length < 3) + return false; + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Loaded value is a temporary. + notify_variable_access(args[1], current_block->self); + + // Might be an access chain we have to track use of. + notify_variable_access(args[2], current_block->self); + break; + } + + case OpFunctionCall: + { + if (length < 3) + return false; + + // Return value may be a temporary. + if (compiler.get_type(args[0]).basetype != SPIRType::Void) + notify_variable_access(args[1], current_block->self); + + length -= 3; + args += 3; + + for (uint32_t i = 0; i < length; i++) + { + auto *var = compiler.maybe_get_backing_variable(args[i]); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + // Assume we can get partial writes to this variable. + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + // Cannot easily prove if argument we pass to a function is completely written. + // Usually, functions write to a dummy variable, + // which is then copied to in full to the real argument. + + // Might try to copy a Phi variable here. + notify_variable_access(args[i], current_block->self); + } + break; + } + + case OpExtInst: + { + for (uint32_t i = 4; i < length; i++) + notify_variable_access(args[i], current_block->self); + notify_variable_access(args[1], current_block->self); + break; + } + + case OpArrayLength: + case OpLine: + case OpNoLine: + // Uses literals, but cannot be a phi variable or temporary, so ignore. + break; + + // Atomics shouldn't be able to access function-local variables. + // Some GLSL builtins access a pointer. + + case OpCompositeInsert: + case OpVectorShuffle: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 4; i++) + notify_variable_access(args[i], current_block->self); + break; + + case OpCompositeExtract: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 3; i++) + notify_variable_access(args[i], current_block->self); + break; + + case OpImageWrite: + for (uint32_t i = 0; i < length; i++) + { + // Argument 3 is a literal. + if (i != 3) + notify_variable_access(args[i], current_block->self); + } + break; + + case OpImageSampleImplicitLod: + case OpImageSampleExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageFetch: + case OpImageSparseFetch: + case OpImageRead: + case OpImageSparseRead: + for (uint32_t i = 1; i < length; i++) + { + // Argument 4 is a literal. + if (i != 4) + notify_variable_access(args[i], current_block->self); + } + break; + + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageGather: + case OpImageSparseGather: + case OpImageDrefGather: + case OpImageSparseDrefGather: + for (uint32_t i = 1; i < length; i++) + { + // Argument 5 is a literal. + if (i != 5) + notify_variable_access(args[i], current_block->self); + } + break; + + default: + { + // Rather dirty way of figuring out where Phi variables are used. + // As long as only IDs are used, we can scan through instructions and try to find any evidence that + // the ID of a variable has been used. + // There are potential false positives here where a literal is used in-place of an ID, + // but worst case, it does not affect the correctness of the compile. + // Exhaustive analysis would be better here, but it's not worth it for now. + for (uint32_t i = 0; i < length; i++) + notify_variable_access(args[i], current_block->self); + break; + } + } + return true; +} + +Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_) + : compiler(compiler_) + , variable_id(variable_id_) +{ +} + +bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &) +{ + return false; +} + +bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + switch (op) + { + case OpStore: + if (length < 2) + return false; + if (args[0] == variable_id) + { + static_expression = args[1]; + write_count++; + } + break; + + case OpLoad: + if (length < 3) + return false; + if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized. + return false; + break; + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + if (length < 3) + return false; + if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail. + return false; + break; + + default: + break; + } + + return true; +} + +void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler, + bool single_function) +{ + auto &cfg = *function_cfgs.find(entry.self)->second; + + // For each variable which is statically accessed. + for (auto &accessed_var : handler.accessed_variables_to_block) + { + auto &blocks = accessed_var.second; + auto &var = get(accessed_var.first); + auto &type = expression_type(accessed_var.first); + + // Only consider function local variables here. + // If we only have a single function in our CFG, private storage is also fine, + // since it behaves like a function local variable. + bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate); + if (!allow_lut) + continue; + + // We cannot be a phi variable. + if (var.phi_variable) + continue; + + // Only consider arrays here. + if (type.array.empty()) + continue; + + // If the variable has an initializer, make sure it is a constant expression. + uint32_t static_constant_expression = 0; + if (var.initializer) + { + if (ir.ids[var.initializer].get_type() != TypeConstant) + continue; + static_constant_expression = var.initializer; + + // There can be no stores to this variable, we have now proved we have a LUT. + if (handler.complete_write_variables_to_block.count(var.self) != 0 || + handler.partial_write_variables_to_block.count(var.self) != 0) + continue; + } + else + { + // We can have one, and only one write to the variable, and that write needs to be a constant. + + // No partial writes allowed. + if (handler.partial_write_variables_to_block.count(var.self) != 0) + continue; + + auto itr = handler.complete_write_variables_to_block.find(var.self); + + // No writes? + if (itr == end(handler.complete_write_variables_to_block)) + continue; + + // We write to the variable in more than one block. + auto &write_blocks = itr->second; + if (write_blocks.size() != 1) + continue; + + // The write needs to happen in the dominating block. + DominatorBuilder builder(cfg); + for (auto &block : blocks) + builder.add_block(block); + uint32_t dominator = builder.get_dominator(); + + // The complete write happened in a branch or similar, cannot deduce static expression. + if (write_blocks.count(dominator) == 0) + continue; + + // Find the static expression for this variable. + StaticExpressionAccessHandler static_expression_handler(*this, var.self); + traverse_all_reachable_opcodes(get(dominator), static_expression_handler); + + // We want one, and exactly one write + if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0) + continue; + + // Is it a constant expression? + if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant) + continue; + + // We found a LUT! + static_constant_expression = static_expression_handler.static_expression; + } + + get(static_constant_expression).is_used_as_lut = true; + var.static_expression = static_constant_expression; + var.statically_assigned = true; + var.remapped_variable = true; + } +} + +void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler) +{ + // First, we map out all variable access within a function. + // Essentially a map of block -> { variables accessed in the basic block } + traverse_all_reachable_opcodes(entry, handler); + + auto &cfg = *function_cfgs.find(entry.self)->second; + + // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier. + analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, + handler.complete_write_variables_to_block); + + unordered_map potential_loop_variables; + + // Find the loop dominator block for each block. + for (auto &block_id : entry.blocks) + { + auto &block = get(block_id); + + auto itr = ir.continue_block_to_loop_header.find(block_id); + if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id) + { + // Continue block might be unreachable in the CFG, but we still like to know the loop dominator. + // Edge case is when continue block is also the loop header, don't set the dominator in this case. + block.loop_dominator = itr->second; + } + else + { + uint32_t loop_dominator = cfg.find_loop_dominator(block_id); + if (loop_dominator != block_id) + block.loop_dominator = loop_dominator; + else + block.loop_dominator = SPIRBlock::NoDominator; + } + } + + // For each variable which is statically accessed. + for (auto &var : handler.accessed_variables_to_block) + { + // Only deal with variables which are considered local variables in this function. + if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) == + end(entry.local_variables)) + continue; + + DominatorBuilder builder(cfg); + auto &blocks = var.second; + auto &type = expression_type(var.first); + + // Figure out which block is dominating all accesses of those variables. + for (auto &block : blocks) + { + // If we're accessing a variable inside a continue block, this variable might be a loop variable. + // We can only use loop variables with scalars, as we cannot track static expressions for vectors. + if (is_continue(block)) + { + // Potentially awkward case to check for. + // We might have a variable inside a loop, which is touched by the continue block, + // but is not actually a loop variable. + // The continue block is dominated by the inner part of the loop, which does not make sense in high-level + // language output because it will be declared before the body, + // so we will have to lift the dominator up to the relevant loop header instead. + builder.add_block(ir.continue_block_to_loop_header[block]); + + // Arrays or structs cannot be loop variables. + if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty()) + { + // The variable is used in multiple continue blocks, this is not a loop + // candidate, signal that by setting block to -1u. + auto &potential = potential_loop_variables[var.first]; + + if (potential == 0) + potential = block; + else + potential = ~(0u); + } + } + builder.add_block(block); + } + + builder.lift_continue_block_dominator(); + + // Add it to a per-block list of variables. + BlockID dominating_block = builder.get_dominator(); + + // For variables whose dominating block is inside a loop, there is a risk that these variables + // actually need to be preserved across loop iterations. We can express this by adding + // a "read" access to the loop header. + // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable. + // Should that fail, we look for the outermost loop header and tack on an access there. + // Phi nodes cannot have this problem. + if (dominating_block) + { + auto &variable = get(var.first); + if (!variable.phi_variable) + { + auto *block = &get(dominating_block); + bool preserve = may_read_undefined_variable_in_block(*block, var.first); + if (preserve) + { + // Find the outermost loop scope. + while (block->loop_dominator != BlockID(SPIRBlock::NoDominator)) + block = &get(block->loop_dominator); + + if (block->self != dominating_block) + { + builder.add_block(block->self); + dominating_block = builder.get_dominator(); + } + } + } + } + + // If all blocks here are dead code, this will be 0, so the variable in question + // will be completely eliminated. + if (dominating_block) + { + auto &block = get(dominating_block); + block.dominated_variables.push_back(var.first); + get(var.first).dominator = dominating_block; + } + } + + for (auto &var : handler.accessed_temporaries_to_block) + { + auto itr = handler.result_id_to_type.find(var.first); + + if (itr == end(handler.result_id_to_type)) + { + // We found a false positive ID being used, ignore. + // This should probably be an assert. + continue; + } + + // There is no point in doing domination analysis for opaque types. + auto &type = get(itr->second); + if (type_is_opaque_value(type)) + continue; + + DominatorBuilder builder(cfg); + bool force_temporary = false; + bool used_in_header_hoisted_continue_block = false; + + // Figure out which block is dominating all accesses of those temporaries. + auto &blocks = var.second; + for (auto &block : blocks) + { + builder.add_block(block); + + if (blocks.size() != 1 && is_continue(block)) + { + // The risk here is that inner loop can dominate the continue block. + // Any temporary we access in the continue block must be declared before the loop. + // This is moot for complex loops however. + auto &loop_header_block = get(ir.continue_block_to_loop_header[block]); + assert(loop_header_block.merge == SPIRBlock::MergeLoop); + builder.add_block(loop_header_block.self); + used_in_header_hoisted_continue_block = true; + } + } + + uint32_t dominating_block = builder.get_dominator(); + + if (blocks.size() != 1 && is_single_block_loop(dominating_block)) + { + // Awkward case, because the loop header is also the continue block, + // so hoisting to loop header does not help. + force_temporary = true; + } + + if (dominating_block) + { + // If we touch a variable in the dominating block, this is the expected setup. + // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops. + bool first_use_is_dominator = blocks.count(dominating_block) != 0; + + if (!first_use_is_dominator || force_temporary) + { + if (handler.access_chain_expressions.count(var.first)) + { + // Exceptionally rare case. + // We cannot declare temporaries of access chains (except on MSL perhaps with pointers). + // Rather than do that, we force the indexing expressions to be declared in the right scope by + // tracking their usage to that end. There is no temporary to hoist. + // However, we still need to observe declaration order of the access chain. + + if (used_in_header_hoisted_continue_block) + { + // For this scenario, we used an access chain inside a continue block where we also registered an access to header block. + // This is a problem as we need to declare an access chain properly first with full definition. + // We cannot use temporaries for these expressions, + // so we must make sure the access chain is declared ahead of time. + // Force a complex for loop to deal with this. + // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option. + auto &loop_header_block = get(dominating_block); + assert(loop_header_block.merge == SPIRBlock::MergeLoop); + loop_header_block.complex_continue = true; + } + } + else + { + // This should be very rare, but if we try to declare a temporary inside a loop, + // and that temporary is used outside the loop as well (spirv-opt inliner likes this) + // we should actually emit the temporary outside the loop. + hoisted_temporaries.insert(var.first); + forced_temporaries.insert(var.first); + + auto &block_temporaries = get(dominating_block).declare_temporary; + block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); + } + } + else if (blocks.size() > 1) + { + // Keep track of the temporary as we might have to declare this temporary. + // This can happen if the loop header dominates a temporary, but we have a complex fallback loop. + // In this case, the header is actually inside the for (;;) {} block, and we have problems. + // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block + // declares the temporary. + auto &block_temporaries = get(dominating_block).potential_declare_temporary; + block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); + } + } + } + + unordered_set seen_blocks; + + // Now, try to analyze whether or not these variables are actually loop variables. + for (auto &loop_variable : potential_loop_variables) + { + auto &var = get(loop_variable.first); + auto dominator = var.dominator; + BlockID block = loop_variable.second; + + // The variable was accessed in multiple continue blocks, ignore. + if (block == BlockID(~(0u)) || block == BlockID(0)) + continue; + + // Dead code. + if (dominator == ID(0)) + continue; + + BlockID header = 0; + + // Find the loop header for this block if we are a continue block. + { + auto itr = ir.continue_block_to_loop_header.find(block); + if (itr != end(ir.continue_block_to_loop_header)) + { + header = itr->second; + } + else if (get(block).continue_block == block) + { + // Also check for self-referential continue block. + header = block; + } + } + + assert(header); + auto &header_block = get(header); + auto &blocks = handler.accessed_variables_to_block[loop_variable.first]; + + // If a loop variable is not used before the loop, it's probably not a loop variable. + bool has_accessed_variable = blocks.count(header) != 0; + + // Now, there are two conditions we need to meet for the variable to be a loop variable. + // 1. The dominating block must have a branch-free path to the loop header, + // this way we statically know which expression should be part of the loop variable initializer. + + // Walk from the dominator, if there is one straight edge connecting + // dominator and loop header, we statically know the loop initializer. + bool static_loop_init = true; + while (dominator != header) + { + if (blocks.count(dominator) != 0) + has_accessed_variable = true; + + auto &succ = cfg.get_succeeding_edges(dominator); + if (succ.size() != 1) + { + static_loop_init = false; + break; + } + + auto &pred = cfg.get_preceding_edges(succ.front()); + if (pred.size() != 1 || pred.front() != dominator) + { + static_loop_init = false; + break; + } + + dominator = succ.front(); + } + + if (!static_loop_init || !has_accessed_variable) + continue; + + // The second condition we need to meet is that no access after the loop + // merge can occur. Walk the CFG to see if we find anything. + + seen_blocks.clear(); + cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool { + // We found a block which accesses the variable outside the loop. + if (blocks.find(walk_block) != end(blocks)) + static_loop_init = false; + return true; + }); + + if (!static_loop_init) + continue; + + // We have a loop variable. + header_block.loop_variables.push_back(loop_variable.first); + // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order + // will break reproducability in regression runs. + sort(begin(header_block.loop_variables), end(header_block.loop_variables)); + get(loop_variable.first).loop_variable = true; + } +} + +bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var) +{ + for (auto &op : block.ops) + { + auto *ops = stream(op); + switch (op.op) + { + case OpStore: + case OpCopyMemory: + if (ops[0] == var) + return false; + break; + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + // Access chains are generally used to partially read and write. It's too hard to analyze + // if all constituents are written fully before continuing, so just assume it's preserved. + // This is the same as the parameter preservation analysis. + if (ops[2] == var) + return true; + break; + + case OpSelect: + // Variable pointers. + // We might read before writing. + if (ops[3] == var || ops[4] == var) + return true; + break; + + case OpPhi: + { + // Variable pointers. + // We might read before writing. + if (op.length < 2) + break; + + uint32_t count = op.length - 2; + for (uint32_t i = 0; i < count; i += 2) + if (ops[i + 2] == var) + return true; + break; + } + + case OpCopyObject: + case OpLoad: + if (ops[2] == var) + return true; + break; + + case OpFunctionCall: + { + if (op.length < 3) + break; + + // May read before writing. + uint32_t count = op.length - 3; + for (uint32_t i = 0; i < count; i++) + if (ops[i + 3] == var) + return true; + break; + } + + default: + break; + } + } + + // Not accessed somehow, at least not in a usual fashion. + // It's likely accessed in a branch, so assume we must preserve. + return true; +} + +Bitset Compiler::get_buffer_block_flags(VariableID id) const +{ + return ir.get_buffer_block_flags(get(id)); +} + +bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type) +{ + if (type.basetype == SPIRType::Struct) + { + base_type = SPIRType::Unknown; + for (auto &member_type : type.member_types) + { + SPIRType::BaseType member_base; + if (!get_common_basic_type(get(member_type), member_base)) + return false; + + if (base_type == SPIRType::Unknown) + base_type = member_base; + else if (base_type != member_base) + return false; + } + return true; + } + else + { + base_type = type.basetype; + return true; + } +} + +void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin, + const Bitset &decoration_flags) +{ + // If used, we will need to explicitly declare a new array size for these builtins. + + if (builtin == BuiltInClipDistance) + { + if (!type.array_size_literal[0]) + SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal."); + uint32_t array_size = type.array[0]; + if (array_size == 0) + SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized."); + compiler.clip_distance_count = array_size; + } + else if (builtin == BuiltInCullDistance) + { + if (!type.array_size_literal[0]) + SPIRV_CROSS_THROW("Array size for CullDistance must be a literal."); + uint32_t array_size = type.array[0]; + if (array_size == 0) + SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized."); + compiler.cull_distance_count = array_size; + } + else if (builtin == BuiltInPosition) + { + if (decoration_flags.get(DecorationInvariant)) + compiler.position_invariant = true; + } +} + +bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length) +{ + const auto add_if_builtin = [&](uint32_t id) { + // Only handles variables here. + // Builtins which are part of a block are handled in AccessChain. + auto *var = compiler.maybe_get(id); + auto &decorations = compiler.ir.meta[id].decoration; + if (var && decorations.builtin) + { + auto &type = compiler.get(var->basetype); + auto &flags = + type.storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; + flags.set(decorations.builtin_type); + handle_builtin(type, decorations.builtin_type, decorations.decoration_flags); + } + }; + + switch (opcode) + { + case OpStore: + if (length < 1) + return false; + + add_if_builtin(args[0]); + break; + + case OpCopyMemory: + if (length < 2) + return false; + + add_if_builtin(args[0]); + add_if_builtin(args[1]); + break; + + case OpCopyObject: + case OpLoad: + if (length < 3) + return false; + + add_if_builtin(args[2]); + break; + + case OpSelect: + if (length < 5) + return false; + + add_if_builtin(args[3]); + add_if_builtin(args[4]); + break; + + case OpPhi: + { + if (length < 2) + return false; + + uint32_t count = length - 2; + args += 2; + for (uint32_t i = 0; i < count; i += 2) + add_if_builtin(args[i]); + break; + } + + case OpFunctionCall: + { + if (length < 3) + return false; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + add_if_builtin(args[i]); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + { + if (length < 4) + return false; + + // Only consider global variables, cannot consider variables in functions yet, or other + // access chains as they have not been created yet. + auto *var = compiler.maybe_get(args[2]); + if (!var) + break; + + // Required if we access chain into builtins like gl_GlobalInvocationID. + add_if_builtin(args[2]); + + // Start traversing type hierarchy at the proper non-pointer types. + auto *type = &compiler.get_variable_data_type(*var); + + auto &flags = + var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + { + // Pointers + if (opcode == OpPtrAccessChain && i == 0) + { + type = &compiler.get(type->parent_type); + continue; + } + + // Arrays + if (!type->array.empty()) + { + type = &compiler.get(type->parent_type); + } + // Structs + else if (type->basetype == SPIRType::Struct) + { + uint32_t index = compiler.get(args[i]).scalar(); + + if (index < uint32_t(compiler.ir.meta[type->self].members.size())) + { + auto &decorations = compiler.ir.meta[type->self].members[index]; + if (decorations.builtin) + { + flags.set(decorations.builtin_type); + handle_builtin(compiler.get(type->member_types[index]), decorations.builtin_type, + decorations.decoration_flags); + } + } + + type = &compiler.get(type->member_types[index]); + } + else + { + // No point in traversing further. We won't find any extra builtins. + break; + } + } + break; + } + + default: + break; + } + + return true; +} + +void Compiler::update_active_builtins() +{ + active_input_builtins.reset(); + active_output_builtins.reset(); + cull_distance_count = 0; + clip_distance_count = 0; + ActiveBuiltinHandler handler(*this); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); +} + +// Returns whether this shader uses a builtin of the storage class +bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) +{ + const Bitset *flags; + switch (storage) + { + case StorageClassInput: + flags = &active_input_builtins; + break; + case StorageClassOutput: + flags = &active_output_builtins; + break; + + default: + return false; + } + return flags->get(builtin); +} + +void Compiler::analyze_image_and_sampler_usage() +{ + CombinedImageSamplerDrefHandler dref_handler(*this); + traverse_all_reachable_opcodes(get(ir.default_entry_point), dref_handler); + + CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions + // down to main(). + // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions. + handler.dependency_hierarchy.clear(); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + comparison_ids = move(handler.comparison_ids); + need_subpass_input = handler.need_subpass_input; + + // Forward information from separate images and samplers into combined image samplers. + for (auto &combined : combined_image_samplers) + if (comparison_ids.count(combined.sampler_id)) + comparison_ids.insert(combined.combined_id); +} + +bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t) +{ + // Mark all sampled images which are used with Dref. + switch (opcode) + { + case OpImageSampleDrefExplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageDrefGather: + case OpImageSparseDrefGather: + dref_combined_samplers.insert(args[2]); + return true; + + default: + break; + } + + return true; +} + +const CFG &Compiler::get_cfg_for_current_function() const +{ + assert(current_function); + return get_cfg_for_function(current_function->self); +} + +const CFG &Compiler::get_cfg_for_function(uint32_t id) const +{ + auto cfg_itr = function_cfgs.find(id); + assert(cfg_itr != end(function_cfgs)); + assert(cfg_itr->second); + return *cfg_itr->second; +} + +void Compiler::build_function_control_flow_graphs_and_analyze() +{ + CFGBuilder handler(*this); + handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get(ir.default_entry_point))); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + function_cfgs = move(handler.function_cfgs); + bool single_function = function_cfgs.size() <= 1; + + for (auto &f : function_cfgs) + { + auto &func = get(f.first); + AnalyzeVariableScopeAccessHandler scope_handler(*this, func); + analyze_variable_scope(func, scope_handler); + find_function_local_luts(func, scope_handler, single_function); + + // Check if we can actually use the loop variables we found in analyze_variable_scope. + // To use multiple initializers, we need the same type and qualifiers. + for (auto block : func.blocks) + { + auto &b = get(block); + if (b.loop_variables.size() < 2) + continue; + + auto &flags = get_decoration_bitset(b.loop_variables.front()); + uint32_t type = get(b.loop_variables.front()).basetype; + bool invalid_initializers = false; + for (auto loop_variable : b.loop_variables) + { + if (flags != get_decoration_bitset(loop_variable) || + type != get(b.loop_variables.front()).basetype) + { + invalid_initializers = true; + break; + } + } + + if (invalid_initializers) + { + for (auto loop_variable : b.loop_variables) + get(loop_variable).loop_variable = false; + b.loop_variables.clear(); + } + } + } +} + +Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_) + : compiler(compiler_) +{ +} + +bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t) +{ + return true; +} + +bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) +{ + if (function_cfgs.find(func.self) == end(function_cfgs)) + { + function_cfgs[func.self].reset(new CFG(compiler, func)); + return true; + } + else + return false; +} + +void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src) +{ + dependency_hierarchy[dst].insert(src); + // Propagate up any comparison state if we're loading from one such variable. + if (comparison_ids.count(src)) + comparison_ids.insert(dst); +} + +bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &func = compiler.get(args[2]); + const auto *arg = &args[3]; + length -= 3; + + for (uint32_t i = 0; i < length; i++) + { + auto &argument = func.arguments[i]; + add_dependency(argument.id, arg[i]); + } + + return true; +} + +void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id) +{ + // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids. + comparison_ids.insert(id); + + for (auto &dep_id : dependency_hierarchy[id]) + add_hierarchy_to_comparison_ids(dep_id); +} + +bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + switch (opcode) + { + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + case OpLoad: + { + if (length < 3) + return false; + + add_dependency(args[1], args[2]); + + // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs. + // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord. + auto &type = compiler.get(args[0]); + if (type.image.dim == DimSubpassData) + need_subpass_input = true; + + // If we load a SampledImage and it will be used with Dref, propagate the state up. + if (dref_combined_samplers.count(args[1]) != 0) + add_hierarchy_to_comparison_ids(args[1]); + break; + } + + case OpSampledImage: + { + if (length < 4) + return false; + + uint32_t result_type = args[0]; + uint32_t result_id = args[1]; + auto &type = compiler.get(result_type); + + // If the underlying resource has been used for comparison then duplicate loads of that resource must be too. + // This image must be a depth image. + uint32_t image = args[2]; + uint32_t sampler = args[3]; + + if (type.image.depth || dref_combined_samplers.count(result_id) != 0) + { + add_hierarchy_to_comparison_ids(image); + + // This sampler must be a SamplerComparisonState, and not a regular SamplerState. + add_hierarchy_to_comparison_ids(sampler); + + // Mark the OpSampledImage itself as being comparison state. + comparison_ids.insert(result_id); + } + return true; + } + + default: + break; + } + + return true; +} + +bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const +{ + auto *m = ir.find_meta(id); + return m && m->hlsl_is_magic_counter_buffer; +} + +bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const +{ + auto *m = ir.find_meta(id); + + // First, check for the proper decoration. + if (m && m->hlsl_magic_counter_buffer != 0) + { + counter_id = m->hlsl_magic_counter_buffer; + return true; + } + else + return false; +} + +void Compiler::make_constant_null(uint32_t id, uint32_t type) +{ + auto &constant_type = get(type); + + if (constant_type.pointer) + { + auto &constant = set(id, type); + constant.make_null(constant_type); + } + else if (!constant_type.array.empty()) + { + assert(constant_type.parent_type); + uint32_t parent_id = ir.increase_bound_by(1); + make_constant_null(parent_id, constant_type.parent_type); + + if (!constant_type.array_size_literal.back()) + SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); + + SmallVector elements(constant_type.array.back()); + for (uint32_t i = 0; i < constant_type.array.back(); i++) + elements[i] = parent_id; + set(id, type, elements.data(), uint32_t(elements.size()), false); + } + else if (!constant_type.member_types.empty()) + { + uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size())); + SmallVector elements(constant_type.member_types.size()); + for (uint32_t i = 0; i < constant_type.member_types.size(); i++) + { + make_constant_null(member_ids + i, constant_type.member_types[i]); + elements[i] = member_ids + i; + } + set(id, type, elements.data(), uint32_t(elements.size()), false); + } + else + { + auto &constant = set(id, type); + constant.make_null(constant_type); + } +} + +const SmallVector &Compiler::get_declared_capabilities() const +{ + return ir.declared_capabilities; +} + +const SmallVector &Compiler::get_declared_extensions() const +{ + return ir.declared_extensions; +} + +std::string Compiler::get_remapped_declared_block_name(VariableID id) const +{ + return get_remapped_declared_block_name(id, false); +} + +std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const +{ + auto itr = declared_block_names.find(id); + if (itr != end(declared_block_names)) + { + return itr->second; + } + else + { + auto &var = get(id); + + if (fallback_prefer_instance_name) + { + return to_name(var.self); + } + else + { + auto &type = get(var.basetype); + auto *type_meta = ir.find_meta(type.self); + auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr; + return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name; + } + } +} + +bool Compiler::reflection_ssbo_instance_name_is_significant() const +{ + if (ir.source.known) + { + // UAVs from HLSL source tend to be declared in a way where the type is reused + // but the instance name is significant, and that's the name we should report. + // For GLSL, SSBOs each have their own block type as that's how GLSL is written. + return ir.source.hlsl; + } + + unordered_set ssbo_type_ids; + bool aliased_ssbo_types = false; + + // If we don't have any OpSource information, we need to perform some shaky heuristics. + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + auto &type = this->get(var.basetype); + if (!type.pointer || var.storage == StorageClassFunction) + return; + + bool ssbo = var.storage == StorageClassStorageBuffer || + (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)); + + if (ssbo) + { + if (ssbo_type_ids.count(type.self)) + aliased_ssbo_types = true; + else + ssbo_type_ids.insert(type.self); + } + }); + + // If the block name is aliased, assume we have HLSL-style UAV declarations. + return aliased_ssbo_types; +} + +bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, + uint32_t length) +{ + // Most instructions follow the pattern of . + // There are some exceptions. + switch (op) + { + case OpStore: + case OpCopyMemory: + case OpCopyMemorySized: + case OpImageWrite: + case OpAtomicStore: + case OpAtomicFlagClear: + case OpEmitStreamVertex: + case OpEndStreamPrimitive: + case OpControlBarrier: + case OpMemoryBarrier: + case OpGroupWaitEvents: + case OpRetainEvent: + case OpReleaseEvent: + case OpSetUserEventStatus: + case OpCaptureEventProfilingInfo: + case OpCommitReadPipe: + case OpCommitWritePipe: + case OpGroupCommitReadPipe: + case OpGroupCommitWritePipe: + case OpLine: + case OpNoLine: + return false; + + default: + if (length > 1 && maybe_get(args[0]) != nullptr) + { + result_type = args[0]; + result_id = args[1]; + return true; + } + else + return false; + } +} + +Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const +{ + Bitset flags; + auto *type_meta = ir.find_meta(type.self); + + if (type_meta) + { + auto &members = type_meta->members; + if (index >= members.size()) + return flags; + auto &dec = members[index]; + + flags.merge_or(dec.decoration_flags); + + auto &member_type = get(type.member_types[index]); + + // If our member type is a struct, traverse all the child members as well recursively. + auto &member_childs = member_type.member_types; + for (uint32_t i = 0; i < member_childs.size(); i++) + { + auto &child_member_type = get(member_childs[i]); + if (!child_member_type.pointer) + flags.merge_or(combined_decoration_for_member(member_type, i)); + } + } + + return flags; +} + +bool Compiler::is_desktop_only_format(spv::ImageFormat format) +{ + switch (format) + { + // Desktop-only formats + case ImageFormatR11fG11fB10f: + case ImageFormatR16f: + case ImageFormatRgb10A2: + case ImageFormatR8: + case ImageFormatRg8: + case ImageFormatR16: + case ImageFormatRg16: + case ImageFormatRgba16: + case ImageFormatR16Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRgba16Snorm: + case ImageFormatR8Snorm: + case ImageFormatRg8Snorm: + case ImageFormatR8ui: + case ImageFormatRg8ui: + case ImageFormatR16ui: + case ImageFormatRgb10a2ui: + case ImageFormatR8i: + case ImageFormatRg8i: + case ImageFormatR16i: + return true; + default: + break; + } + + return false; +} + +bool Compiler::image_is_comparison(const SPIRType &type, uint32_t id) const +{ + return type.image.depth || (comparison_ids.count(id) != 0); +} + +bool Compiler::type_is_opaque_value(const SPIRType &type) const +{ + return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image || + type.basetype == SPIRType::Sampler); +} + +// Make these member functions so we can easily break on any force_recompile events. +void Compiler::force_recompile() +{ + is_force_recompile = true; +} + +bool Compiler::is_forcing_recompilation() const +{ + return is_force_recompile; +} + +void Compiler::clear_force_recompile() +{ + is_force_recompile = false; +} + +Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_) + : compiler(compiler_) +{ +} + +bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t) +{ + if (op == OpConvertUToPtr || op == OpBitcast) + { + auto &type = compiler.get(args[0]); + if (type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && type.pointer_depth == 1) + { + // If we need to cast to a pointer type which is not a block, we might need to synthesize ourselves + // a block type which wraps this POD type. + if (type.basetype != SPIRType::Struct) + types.insert(args[0]); + } + } + + return true; +} + +void Compiler::analyze_non_block_pointer_types() +{ + PhysicalStorageBufferPointerHandler handler(*this); + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + physical_storage_non_block_pointer_types.reserve(handler.types.size()); + for (auto type : handler.types) + physical_storage_non_block_pointer_types.push_back(type); + sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t) +{ + if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT) + { + if (interlock_function_id != 0 && interlock_function_id != call_stack.back()) + { + // Most complex case, we have no sensible way of dealing with this + // other than taking the 100% conservative approach, exit early. + split_function_case = true; + return false; + } + else + { + interlock_function_id = call_stack.back(); + // If this call is performed inside control flow we have a problem. + auto &cfg = compiler.get_cfg_for_function(interlock_function_id); + + uint32_t from_block_id = compiler.get(interlock_function_id).entry_block; + bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id); + if (!outside_control_flow) + control_flow_interlock = true; + } + } + return true; +} + +void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block) +{ + current_block_id = block.self; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t) +{ + call_stack.pop_back(); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + if (args[2] == interlock_function_id) + call_stack_is_interlocked = true; + + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t) +{ + if (call_stack.back() == interlock_function_id) + call_stack_is_interlocked = false; + + call_stack.pop_back(); + return true; +} + +void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id) +{ + if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) || + split_function_case) + { + compiler.interlocked_resources.insert(id); + } +} + +bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // Only care about critical section analysis if we have simple case. + if (use_critical_section) + { + if (opcode == OpBeginInvocationInterlockEXT) + { + in_crit_sec = true; + return true; + } + + if (opcode == OpEndInvocationInterlockEXT) + { + // End critical section--nothing more to do. + return false; + } + } + + // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniformConstant: + { + uint32_t result_type = args[0]; + uint32_t id = args[1]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + break; + } + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassStorageBuffer) + { + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + compiler.ir.ids[id].set_allow_type_rewrite(); + } + break; + } + + case OpImageTexelPointer: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + uint32_t id = args[1]; + uint32_t ptr = args[2]; + auto &e = compiler.set(id, "", result_type, true); + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + e.loaded_from = var->self; + break; + } + + case OpStore: + case OpImageWrite: + case OpAtomicStore: + { + if (length < 1) + return false; + + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t dst = args[0]; + uint32_t src = args[1]; + auto *dst_var = compiler.maybe_get_backing_variable(dst); + auto *src_var = compiler.maybe_get_backing_variable(src); + + if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) + access_potential_resource(dst_var->self); + + if (src_var) + { + if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) + break; + + if (src_var->storage == StorageClassUniform && + !compiler.has_decoration(compiler.get(src_var->basetype).self, DecorationBufferBlock)) + { + break; + } + + access_potential_resource(src_var->self); + } + + break; + } + + case OpImageRead: + case OpAtomicLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassUniformConstant: + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + default: + break; + } + + return true; +} + +void Compiler::analyze_interlocked_resource_usage() +{ + if (get_execution_model() == ExecutionModelFragment && + (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT))) + { + InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point); + traverse_all_reachable_opcodes(get(ir.default_entry_point), prepass_handler); + + InterlockedResourceAccessHandler handler(*this, ir.default_entry_point); + handler.interlock_function_id = prepass_handler.interlock_function_id; + handler.split_function_case = prepass_handler.split_function_case; + handler.control_flow_interlock = prepass_handler.control_flow_interlock; + handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock; + + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + // For GLSL. If we hit any of these cases, we have to fall back to conservative approach. + interlocked_is_complex = + !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point; + } +} + +bool Compiler::type_is_array_of_pointers(const SPIRType &type) const +{ + if (!type.pointer) + return false; + + // If parent type has same pointer depth, we must have an array of pointers. + return type.pointer_depth == get(type.parent_type).pointer_depth; +} + +bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const +{ + return type.pointer && type.storage == StorageClassPhysicalStorageBuffer && + type.pointer_depth > get(type.parent_type).pointer_depth; +} + +bool Compiler::flush_phi_required(BlockID from, BlockID to) const +{ + auto &child = get(to); + for (auto &phi : child.phi_variables) + if (phi.parent == from) + return true; + return false; +} + +void Compiler::add_loop_level() +{ + current_loop_level++; +} diff --git a/dep/spirv-cross/spirv_cross.hpp b/dep/spirv-cross/spirv_cross.hpp new file mode 100644 index 000000000..47f1d7949 --- /dev/null +++ b/dep/spirv-cross/spirv_cross.hpp @@ -0,0 +1,1082 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_HPP +#define SPIRV_CROSS_HPP + +#include "spirv.hpp" +#include "spirv_cfg.hpp" +#include "spirv_cross_parsed_ir.hpp" + +namespace SPIRV_CROSS_NAMESPACE +{ +struct Resource +{ + // Resources are identified with their SPIR-V ID. + // This is the ID of the OpVariable. + ID id; + + // The type ID of the variable which includes arrays and all type modifications. + // This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general + // since these modifications typically happen on the base_type_id. + TypeID type_id; + + // The base type of the declared resource. + // This type is the base type which ignores pointers and arrays of the type_id. + // This is mostly useful to parse decorations of the underlying type. + // base_type_id can also be obtained with get_type(get_type(type_id).self). + TypeID base_type_id; + + // The declared name (OpName) of the resource. + // For Buffer blocks, the name actually reflects the externally + // visible Block name. + // + // This name can be retrieved again by using either + // get_name(id) or get_name(base_type_id) depending if it's a buffer block or not. + // + // This name can be an empty string in which case get_fallback_name(id) can be + // used which obtains a suitable fallback identifier for an ID. + std::string name; +}; + +struct ShaderResources +{ + SmallVector uniform_buffers; + SmallVector storage_buffers; + SmallVector stage_inputs; + SmallVector stage_outputs; + SmallVector subpass_inputs; + SmallVector storage_images; + SmallVector sampled_images; + SmallVector atomic_counters; + SmallVector acceleration_structures; + + // There can only be one push constant block, + // but keep the vector in case this restriction is lifted in the future. + SmallVector push_constant_buffers; + + // For Vulkan GLSL and HLSL source, + // these correspond to separate texture2D and samplers respectively. + SmallVector separate_images; + SmallVector separate_samplers; +}; + +struct CombinedImageSampler +{ + // The ID of the sampler2D variable. + VariableID combined_id; + // The ID of the texture2D variable. + VariableID image_id; + // The ID of the sampler variable. + VariableID sampler_id; +}; + +struct SpecializationConstant +{ + // The ID of the specialization constant. + ConstantID id; + // The constant ID of the constant, used in Vulkan during pipeline creation. + uint32_t constant_id; +}; + +struct BufferRange +{ + unsigned index; + size_t offset; + size_t range; +}; + +enum BufferPackingStandard +{ + BufferPackingStd140, + BufferPackingStd430, + BufferPackingStd140EnhancedLayout, + BufferPackingStd430EnhancedLayout, + BufferPackingHLSLCbuffer, + BufferPackingHLSLCbufferPackOffset, + BufferPackingScalar, + BufferPackingScalarEnhancedLayout +}; + +struct EntryPoint +{ + std::string name; + spv::ExecutionModel execution_model; +}; + +class Compiler +{ +public: + friend class CFG; + friend class DominatorBuilder; + + // The constructor takes a buffer of SPIR-V words and parses it. + // It will create its own parser, parse the SPIR-V and move the parsed IR + // as if you had called the constructors taking ParsedIR directly. + explicit Compiler(std::vector ir); + Compiler(const uint32_t *ir, size_t word_count); + + // This is more modular. We can also consume a ParsedIR structure directly, either as a move, or copy. + // With copy, we can reuse the same parsed IR for multiple Compiler instances. + explicit Compiler(const ParsedIR &ir); + explicit Compiler(ParsedIR &&ir); + + virtual ~Compiler() = default; + + // After parsing, API users can modify the SPIR-V via reflection and call this + // to disassemble the SPIR-V into the desired langauage. + // Sub-classes actually implement this. + virtual std::string compile(); + + // Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned. + const std::string &get_name(ID id) const; + + // Applies a decoration to an ID. Effectively injects OpDecorate. + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); + + // Overrides the identifier OpName of an ID. + // Identifiers beginning with underscores or identifiers which contain double underscores + // are reserved by the implementation. + void set_name(ID id, const std::string &name); + + // Gets a bitmask for the decorations which are applied to ID. + // I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar) + const Bitset &get_decoration_bitset(ID id) const; + + // Returns whether the decoration has been applied to the ID. + bool has_decoration(ID id, spv::Decoration decoration) const; + + // Gets the value for decorations which take arguments. + // If the decoration is a boolean (i.e. spv::DecorationNonWritable), + // 1 will be returned. + // If decoration doesn't exist or decoration is not recognized, + // 0 will be returned. + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; + + // Removes the decoration for an ID. + void unset_decoration(ID id, spv::Decoration decoration); + + // Gets the SPIR-V type associated with ID. + // Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource. + const SPIRType &get_type(TypeID id) const; + + // Gets the SPIR-V type of a variable. + const SPIRType &get_type_from_variable(VariableID id) const; + + // Gets the underlying storage class for an OpVariable. + spv::StorageClass get_storage_class(VariableID id) const; + + // If get_name() is an empty string, get the fallback name which will be used + // instead in the disassembled source. + virtual const std::string get_fallback_name(ID id) const; + + // If get_name() of a Block struct is an empty string, get the fallback name. + // This needs to be per-variable as multiple variables can use the same block type. + virtual const std::string get_block_fallback_name(VariableID id) const; + + // Given an OpTypeStruct in ID, obtain the identifier for member number "index". + // This may be an empty string. + const std::string &get_member_name(TypeID id, uint32_t index) const; + + // Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index". + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; + + // Sets the member identifier for OpTypeStruct ID, member number "index". + void set_member_name(TypeID id, uint32_t index, const std::string &name); + + // Returns the qualified member identifier for OpTypeStruct ID, member number "index", + // or an empty string if no qualified alias exists + const std::string &get_member_qualified_name(TypeID type_id, uint32_t index) const; + + // Gets the decoration mask for a member of a struct, similar to get_decoration_mask. + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; + + // Returns whether the decoration has been applied to a member of a struct. + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + + // Similar to set_decoration, but for struct members. + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, + const std::string &argument); + + // Unsets a member decoration, similar to unset_decoration. + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); + + // Gets the fallback name for a member, similar to get_fallback_name. + virtual const std::string get_fallback_member_name(uint32_t index) const + { + return join("_", index); + } + + // Returns a vector of which members of a struct are potentially in use by a + // SPIR-V shader. The granularity of this analysis is per-member of a struct. + // This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks. + // ID is the Resource::id obtained from get_shader_resources(). + SmallVector get_active_buffer_ranges(VariableID id) const; + + // Returns the effective size of a buffer block. + size_t get_declared_struct_size(const SPIRType &struct_type) const; + + // Returns the effective size of a buffer block, with a given array size + // for a runtime array. + // SSBOs are typically declared as runtime arrays. get_declared_struct_size() will return 0 for the size. + // This is not very helpful for applications which might need to know the array stride of its last member. + // This can be done through the API, but it is not very intuitive how to accomplish this, so here we provide a helper function + // to query the size of the buffer, assuming that the last member has a certain size. + // If the buffer does not contain a runtime array, array_size is ignored, and the function will behave as + // get_declared_struct_size(). + // To get the array stride of the last member, something like: + // get_declared_struct_size_runtime_array(type, 1) - get_declared_struct_size_runtime_array(type, 0) will work. + size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const; + + // Returns the effective size of a buffer block struct member. + size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; + + // Returns a set of all global variables which are statically accessed + // by the control flow graph from the current entry point. + // Only variables which change the interface for a shader are returned, that is, + // variables with storage class of Input, Output, Uniform, UniformConstant, PushConstant and AtomicCounter + // storage classes are returned. + // + // To use the returned set as the filter for which variables are used during compilation, + // this set can be moved to set_enabled_interface_variables(). + std::unordered_set get_active_interface_variables() const; + + // Sets the interface variables which are used during compilation. + // By default, all variables are used. + // Once set, compile() will only consider the set in active_variables. + void set_enabled_interface_variables(std::unordered_set active_variables); + + // Query shader resources, use ids with reflection interface to modify or query binding points, etc. + ShaderResources get_shader_resources() const; + + // Query shader resources, but only return the variables which are part of active_variables. + // E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically + // accessed. + ShaderResources get_shader_resources(const std::unordered_set &active_variables) const; + + // Remapped variables are considered built-in variables and a backend will + // not emit a declaration for this variable. + // This is mostly useful for making use of builtins which are dependent on extensions. + void set_remapped_variable_state(VariableID id, bool remap_enable); + bool get_remapped_variable_state(VariableID id) const; + + // For subpassInput variables which are remapped to plain variables, + // the number of components in the remapped + // variable must be specified as the backing type of subpass inputs are opaque. + void set_subpass_input_remapped_components(VariableID id, uint32_t components); + uint32_t get_subpass_input_remapped_components(VariableID id) const; + + // All operations work on the current entry point. + // Entry points can be swapped out with set_entry_point(). + // Entry points should be set right after the constructor completes as some reflection functions traverse the graph from the entry point. + // Resource reflection also depends on the entry point. + // By default, the current entry point is set to the first OpEntryPoint which appears in the SPIR-V module. + + // Some shader languages restrict the names that can be given to entry points, and the + // corresponding backend will automatically rename an entry point name, during the call + // to compile() if it is illegal. For example, the common entry point name main() is + // illegal in MSL, and is renamed to an alternate name by the MSL backend. + // Given the original entry point name contained in the SPIR-V, this function returns + // the name, as updated by the backend during the call to compile(). If the name is not + // illegal, and has not been renamed, or if this function is called before compile(), + // this function will simply return the same name. + + // New variants of entry point query and reflection. + // Names for entry points in the SPIR-V module may alias if they belong to different execution models. + // To disambiguate, we must pass along with the entry point names the execution model. + SmallVector get_entry_points_and_stages() const; + void set_entry_point(const std::string &entry, spv::ExecutionModel execution_model); + + // Renames an entry point from old_name to new_name. + // If old_name is currently selected as the current entry point, it will continue to be the current entry point, + // albeit with a new name. + // get_entry_points() is essentially invalidated at this point. + void rename_entry_point(const std::string &old_name, const std::string &new_name, + spv::ExecutionModel execution_model); + const SPIREntryPoint &get_entry_point(const std::string &name, spv::ExecutionModel execution_model) const; + SPIREntryPoint &get_entry_point(const std::string &name, spv::ExecutionModel execution_model); + const std::string &get_cleansed_entry_point_name(const std::string &name, + spv::ExecutionModel execution_model) const; + + // Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader. + void update_active_builtins(); + bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage); + + // Query and modify OpExecutionMode. + const Bitset &get_execution_mode_bitset() const; + + void unset_execution_mode(spv::ExecutionMode mode); + void set_execution_mode(spv::ExecutionMode mode, uint32_t arg0 = 0, uint32_t arg1 = 0, uint32_t arg2 = 0); + + // Gets argument for an execution mode (LocalSize, Invocations, OutputVertices). + // For LocalSize, the index argument is used to select the dimension (X = 0, Y = 1, Z = 2). + // For execution modes which do not have arguments, 0 is returned. + uint32_t get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index = 0) const; + spv::ExecutionModel get_execution_model() const; + + bool is_tessellation_shader() const; + + // In SPIR-V, the compute work group size can be represented by a constant vector, in which case + // the LocalSize execution mode is ignored. + // + // This constant vector can be a constant vector, specialization constant vector, or partly specialized constant vector. + // To modify and query work group dimensions which are specialization constants, SPIRConstant values must be modified + // directly via get_constant() rather than using LocalSize directly. This function will return which constants should be modified. + // + // To modify dimensions which are *not* specialization constants, set_execution_mode should be used directly. + // Arguments to set_execution_mode which are specialization constants are effectively ignored during compilation. + // NOTE: This is somewhat different from how SPIR-V works. In SPIR-V, the constant vector will completely replace LocalSize, + // while in this interface, LocalSize is only ignored for specialization constants. + // + // The specialization constant will be written to x, y and z arguments. + // If the component is not a specialization constant, a zeroed out struct will be written. + // The return value is the constant ID of the builtin WorkGroupSize, but this is not expected to be useful + // for most use cases. + uint32_t get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y, + SpecializationConstant &z) const; + + // Analyzes all OpImageFetch (texelFetch) opcodes and checks if there are instances where + // said instruction is used without a combined image sampler. + // GLSL targets do not support the use of texelFetch without a sampler. + // To workaround this, we must inject a dummy sampler which can be used to form a sampler2D at the call-site of + // texelFetch as necessary. + // + // This must be called before build_combined_image_samplers(). + // build_combined_image_samplers() may refer to the ID returned by this method if the returned ID is non-zero. + // The return value will be the ID of a sampler object if a dummy sampler is necessary, or 0 if no sampler object + // is required. + // + // If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile(). + // Calling this function also invalidates get_active_interface_variables(), so this should be called + // before that function. + VariableID build_dummy_sampler_for_combined_images(); + + // Analyzes all separate image and samplers used from the currently selected entry point, + // and re-routes them all to a combined image sampler instead. + // This is required to "support" separate image samplers in targets which do not natively support + // this feature, like GLSL/ESSL. + // + // This must be called before compile() if such remapping is desired. + // This call will add new sampled images to the SPIR-V, + // so it will appear in reflection if get_shader_resources() is called after build_combined_image_samplers. + // + // If any image/sampler remapping was found, no separate image/samplers will appear in the decompiled output, + // but will still appear in reflection. + // + // The resulting samplers will be void of any decorations like name, descriptor sets and binding points, + // so this can be added before compile() if desired. + // + // Combined image samplers originating from this set are always considered active variables. + // Arrays of separate samplers are not supported, but arrays of separate images are supported. + // Array of images + sampler -> Array of combined image samplers. + void build_combined_image_samplers(); + + // Gets a remapping for the combined image samplers. + const SmallVector &get_combined_image_samplers() const + { + return combined_image_samplers; + } + + // Set a new variable type remap callback. + // The type remapping is designed to allow global interface variable to assume more special types. + // A typical example here is to remap sampler2D into samplerExternalOES, which currently isn't supported + // directly by SPIR-V. + // + // In compile() while emitting code, + // for every variable that is declared, including function parameters, the callback will be called + // and the API user has a chance to change the textual representation of the type used to declare the variable. + // The API user can detect special patterns in names to guide the remapping. + void set_variable_type_remap_callback(VariableTypeRemapCallback cb) + { + variable_remap_callback = std::move(cb); + } + + // API for querying which specialization constants exist. + // To modify a specialization constant before compile(), use get_constant(constant.id), + // then update constants directly in the SPIRConstant data structure. + // For composite types, the subconstants can be iterated over and modified. + // constant_type is the SPIRType for the specialization constant, + // which can be queried to determine which fields in the unions should be poked at. + SmallVector get_specialization_constants() const; + SPIRConstant &get_constant(ConstantID id); + const SPIRConstant &get_constant(ConstantID id) const; + + uint32_t get_current_id_bound() const + { + return uint32_t(ir.ids.size()); + } + + // API for querying buffer objects. + // The type passed in here should be the base type of a resource, i.e. + // get_type(resource.base_type_id) + // as decorations are set in the basic Block type. + // The type passed in here must have these decorations set, or an exception is raised. + // Only UBOs and SSBOs or sub-structs which are part of these buffer types will have these decorations set. + uint32_t type_struct_member_offset(const SPIRType &type, uint32_t index) const; + uint32_t type_struct_member_array_stride(const SPIRType &type, uint32_t index) const; + uint32_t type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const; + + // Gets the offset in SPIR-V words (uint32_t) for a decoration which was originally declared in the SPIR-V binary. + // The offset will point to one or more uint32_t literals which can be modified in-place before using the SPIR-V binary. + // Note that adding or removing decorations using the reflection API will not change the behavior of this function. + // If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true, + // otherwise, returns false. + // If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false. + bool get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const; + + // HLSL counter buffer reflection interface. + // Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where + // one buffer implements the storage, and a single buffer containing just a lone "int" implements the counter. + // To SPIR-V these will be exposed as two separate buffers, but glslang HLSL frontend emits a special indentifier + // which lets us link the two buffers together. + + // Queries if a variable ID is a counter buffer which "belongs" to a regular buffer object. + + // If SPV_GOOGLE_hlsl_functionality1 is used, this can be used even with a stripped SPIR-V module. + // Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will + // only return true if OpSource was reported HLSL. + // To rely on this functionality, ensure that the SPIR-V module is not stripped. + + bool buffer_is_hlsl_counter_buffer(VariableID id) const; + + // Queries if a buffer object has a neighbor "counter" buffer. + // If so, the ID of that counter buffer will be returned in counter_id. + // If SPV_GOOGLE_hlsl_functionality1 is used, this can be used even with a stripped SPIR-V module. + // Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will + // only return true if OpSource was reported HLSL. + // To rely on this functionality, ensure that the SPIR-V module is not stripped. + bool buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const; + + // Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module. + const SmallVector &get_declared_capabilities() const; + + // Gets the list of all SPIR-V extensions which were declared in the SPIR-V module. + const SmallVector &get_declared_extensions() const; + + // When declaring buffer blocks in GLSL, the name declared in the GLSL source + // might not be the same as the name declared in the SPIR-V module due to naming conflicts. + // In this case, SPIRV-Cross needs to find a fallback-name, and it might only + // be possible to know this name after compiling to GLSL. + // This is particularly important for HLSL input and UAVs which tends to reuse the same block type + // for multiple distinct blocks. For these cases it is not possible to modify the name of the type itself + // because it might be unique. Instead, you can use this interface to check after compilation which + // name was actually used if your input SPIR-V tends to have this problem. + // For other names like remapped names for variables, etc, it's generally enough to query the name of the variables + // after compiling, block names are an exception to this rule. + // ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type. + // + // This also applies to HLSL cbuffers. + std::string get_remapped_declared_block_name(VariableID id) const; + + // For buffer block variables, get the decorations for that variable. + // Sometimes, decorations for buffer blocks are found in member decorations instead + // of direct decorations on the variable itself. + // The most common use here is to check if a buffer is readonly or writeonly. + Bitset get_buffer_block_flags(VariableID id) const; + +protected: + const uint32_t *stream(const Instruction &instr) const + { + // If we're not going to use any arguments, just return nullptr. + // We want to avoid case where we return an out of range pointer + // that trips debug assertions on some platforms. + if (!instr.length) + return nullptr; + + if (instr.offset + instr.length > ir.spirv.size()) + SPIRV_CROSS_THROW("Compiler::stream() out of range."); + return &ir.spirv[instr.offset]; + } + + ParsedIR ir; + // Marks variables which have global scope and variables which can alias with other variables + // (SSBO, image load store, etc) + SmallVector global_variables; + SmallVector aliased_variables; + + SPIRFunction *current_function = nullptr; + SPIRBlock *current_block = nullptr; + uint32_t current_loop_level = 0; + std::unordered_set active_interface_variables; + bool check_active_interface_variables = false; + + void add_loop_level(); + + void set_initializers(SPIRExpression &e) + { + e.emitted_loop_level = current_loop_level; + } + + template + void set_initializers(const T &) + { + } + + // If our IDs are out of range here as part of opcodes, throw instead of + // undefined behavior. + template + T &set(uint32_t id, P &&... args) + { + ir.add_typed_id(static_cast(T::type), id); + auto &var = variant_set(ir.ids[id], std::forward

(args)...); + var.self = id; + set_initializers(var); + return var; + } + + template + T &get(uint32_t id) + { + return variant_get(ir.ids[id]); + } + + template + T *maybe_get(uint32_t id) + { + if (id >= ir.ids.size()) + return nullptr; + else if (ir.ids[id].get_type() == static_cast(T::type)) + return &get(id); + else + return nullptr; + } + + template + const T &get(uint32_t id) const + { + return variant_get(ir.ids[id]); + } + + template + const T *maybe_get(uint32_t id) const + { + if (id >= ir.ids.size()) + return nullptr; + else if (ir.ids[id].get_type() == static_cast(T::type)) + return &get(id); + else + return nullptr; + } + + // Gets the id of SPIR-V type underlying the given type_id, which might be a pointer. + uint32_t get_pointee_type_id(uint32_t type_id) const; + + // Gets the SPIR-V type underlying the given type, which might be a pointer. + const SPIRType &get_pointee_type(const SPIRType &type) const; + + // Gets the SPIR-V type underlying the given type_id, which might be a pointer. + const SPIRType &get_pointee_type(uint32_t type_id) const; + + // Gets the ID of the SPIR-V type underlying a variable. + uint32_t get_variable_data_type_id(const SPIRVariable &var) const; + + // Gets the SPIR-V type underlying a variable. + SPIRType &get_variable_data_type(const SPIRVariable &var); + + // Gets the SPIR-V type underlying a variable. + const SPIRType &get_variable_data_type(const SPIRVariable &var) const; + + // Gets the SPIR-V element type underlying an array variable. + SPIRType &get_variable_element_type(const SPIRVariable &var); + + // Gets the SPIR-V element type underlying an array variable. + const SPIRType &get_variable_element_type(const SPIRVariable &var) const; + + // Sets the qualified member identifier for OpTypeStruct ID, member number "index". + void set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name); + void set_qualified_name(uint32_t id, const std::string &name); + + // Returns if the given type refers to a sampled image. + bool is_sampled_image_type(const SPIRType &type); + + const SPIREntryPoint &get_entry_point() const; + SPIREntryPoint &get_entry_point(); + static bool is_tessellation_shader(spv::ExecutionModel model); + + virtual std::string to_name(uint32_t id, bool allow_alias = true) const; + bool is_builtin_variable(const SPIRVariable &var) const; + bool is_builtin_type(const SPIRType &type) const; + bool is_hidden_variable(const SPIRVariable &var, bool include_builtins = false) const; + bool is_immutable(uint32_t id) const; + bool is_member_builtin(const SPIRType &type, uint32_t index, spv::BuiltIn *builtin) const; + bool is_scalar(const SPIRType &type) const; + bool is_vector(const SPIRType &type) const; + bool is_matrix(const SPIRType &type) const; + bool is_array(const SPIRType &type) const; + uint32_t expression_type_id(uint32_t id) const; + const SPIRType &expression_type(uint32_t id) const; + bool expression_is_lvalue(uint32_t id) const; + bool variable_storage_is_aliased(const SPIRVariable &var); + SPIRVariable *maybe_get_backing_variable(uint32_t chain); + spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); + + void register_read(uint32_t expr, uint32_t chain, bool forwarded); + void register_write(uint32_t chain); + + inline bool is_continue(uint32_t next) const + { + return (ir.block_meta[next] & ParsedIR::BLOCK_META_CONTINUE_BIT) != 0; + } + + inline bool is_single_block_loop(uint32_t next) const + { + auto &block = get(next); + return block.merge == SPIRBlock::MergeLoop && block.continue_block == ID(next); + } + + inline bool is_break(uint32_t next) const + { + return (ir.block_meta[next] & + (ParsedIR::BLOCK_META_LOOP_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT)) != 0; + } + + inline bool is_loop_break(uint32_t next) const + { + return (ir.block_meta[next] & ParsedIR::BLOCK_META_LOOP_MERGE_BIT) != 0; + } + + inline bool is_conditional(uint32_t next) const + { + return (ir.block_meta[next] & + (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT)) != 0; + } + + // Dependency tracking for temporaries read from variables. + void flush_dependees(SPIRVariable &var); + void flush_all_active_variables(); + void flush_control_dependent_expressions(uint32_t block); + void flush_all_atomic_capable_variables(); + void flush_all_aliased_variables(); + void register_global_read_dependencies(const SPIRBlock &func, uint32_t id); + void register_global_read_dependencies(const SPIRFunction &func, uint32_t id); + std::unordered_set invalid_expressions; + + void update_name_cache(std::unordered_set &cache, std::string &name); + + // A variant which takes two sets of names. The secondary is only used to verify there are no collisions, + // but the set is not updated when we have found a new name. + // Used primarily when adding block interface names. + void update_name_cache(std::unordered_set &cache_primary, + const std::unordered_set &cache_secondary, std::string &name); + + bool function_is_pure(const SPIRFunction &func); + bool block_is_pure(const SPIRBlock &block); + + bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; + bool execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const; + bool execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const; + SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const; + + void force_recompile(); + void clear_force_recompile(); + bool is_forcing_recompilation() const; + bool is_force_recompile = false; + + bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const; + + bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; + void inherit_expression_dependencies(uint32_t dst, uint32_t source); + void add_implied_read_expression(SPIRExpression &e, uint32_t source); + void add_implied_read_expression(SPIRAccessChain &e, uint32_t source); + + // For proper multiple entry point support, allow querying if an Input or Output + // variable is part of that entry points interface. + bool interface_variable_exists_in_entry_point(uint32_t id) const; + + SmallVector combined_image_samplers; + + void remap_variable_type_name(const SPIRType &type, const std::string &var_name, std::string &type_name) const + { + if (variable_remap_callback) + variable_remap_callback(type, var_name, type_name); + } + + void set_ir(const ParsedIR &parsed); + void set_ir(ParsedIR &&parsed); + void parse_fixup(); + + // Used internally to implement various traversals for queries. + struct OpcodeHandler + { + virtual ~OpcodeHandler() = default; + + // Return true if traversal should continue. + // If false, traversal will end immediately. + virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0; + + virtual bool follow_function_call(const SPIRFunction &) + { + return true; + } + + virtual void set_current_block(const SPIRBlock &) + { + } + + // Called after returning from a function or when entering a block, + // can be called multiple times per block, + // while set_current_block is only called on block entry. + virtual void rearm_current_block(const SPIRBlock &) + { + } + + virtual bool begin_function_scope(const uint32_t *, uint32_t) + { + return true; + } + + virtual bool end_function_scope(const uint32_t *, uint32_t) + { + return true; + } + }; + + struct BufferAccessHandler : OpcodeHandler + { + BufferAccessHandler(const Compiler &compiler_, SmallVector &ranges_, uint32_t id_) + : compiler(compiler_) + , ranges(ranges_) + , id(id_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + const Compiler &compiler; + SmallVector &ranges; + uint32_t id; + + std::unordered_set seen; + }; + + struct InterfaceVariableAccessHandler : OpcodeHandler + { + InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set &variables_) + : compiler(compiler_) + , variables(variables_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + const Compiler &compiler; + std::unordered_set &variables; + }; + + struct CombinedImageSamplerHandler : OpcodeHandler + { + CombinedImageSamplerHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + + // Each function in the call stack needs its own remapping for parameters so we can deduce which global variable each texture/sampler the parameter is statically bound to. + std::stack> parameter_remapping; + std::stack functions; + + uint32_t remap_parameter(uint32_t id); + void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length); + void pop_remap_parameters(); + void register_combined_image_sampler(SPIRFunction &caller, VariableID combined_id, VariableID texture_id, + VariableID sampler_id, bool depth); + }; + + struct DummySamplerForCombinedImageHandler : OpcodeHandler + { + DummySamplerForCombinedImageHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + bool need_dummy_sampler = false; + }; + + struct ActiveBuiltinHandler : OpcodeHandler + { + ActiveBuiltinHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + + void handle_builtin(const SPIRType &type, spv::BuiltIn builtin, const Bitset &decoration_flags); + }; + + bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const; + bool traverse_all_reachable_opcodes(const SPIRFunction &block, OpcodeHandler &handler) const; + // This must be an ordered data structure so we always pick the same type aliases. + SmallVector global_struct_cache; + + ShaderResources get_shader_resources(const std::unordered_set *active_variables) const; + + VariableTypeRemapCallback variable_remap_callback; + + bool get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type); + + std::unordered_set forced_temporaries; + std::unordered_set forwarded_temporaries; + std::unordered_set suppressed_usage_tracking; + std::unordered_set hoisted_temporaries; + std::unordered_set forced_invariant_temporaries; + + Bitset active_input_builtins; + Bitset active_output_builtins; + uint32_t clip_distance_count = 0; + uint32_t cull_distance_count = 0; + bool position_invariant = false; + + void analyze_parameter_preservation( + SPIRFunction &entry, const CFG &cfg, + const std::unordered_map> &variable_to_blocks, + const std::unordered_map> &complete_write_blocks); + + // If a variable ID or parameter ID is found in this set, a sampler is actually a shadow/comparison sampler. + // SPIR-V does not support this distinction, so we must keep track of this information outside the type system. + // There might be unrelated IDs found in this set which do not correspond to actual variables. + // This set should only be queried for the existence of samplers which are already known to be variables or parameter IDs. + // Similar is implemented for images, as well as if subpass inputs are needed. + std::unordered_set comparison_ids; + bool need_subpass_input = false; + + // In certain backends, we will need to use a dummy sampler to be able to emit code. + // GLSL does not support texelFetch on texture2D objects, but SPIR-V does, + // so we need to workaround by having the application inject a dummy sampler. + uint32_t dummy_sampler_id = 0; + + void analyze_image_and_sampler_usage(); + + struct CombinedImageSamplerDrefHandler : OpcodeHandler + { + CombinedImageSamplerDrefHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + std::unordered_set dref_combined_samplers; + }; + + struct CombinedImageSamplerUsageHandler : OpcodeHandler + { + CombinedImageSamplerUsageHandler(Compiler &compiler_, + const std::unordered_set &dref_combined_samplers_) + : compiler(compiler_) + , dref_combined_samplers(dref_combined_samplers_) + { + } + + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + const std::unordered_set &dref_combined_samplers; + + std::unordered_map> dependency_hierarchy; + std::unordered_set comparison_ids; + + void add_hierarchy_to_comparison_ids(uint32_t ids); + bool need_subpass_input = false; + void add_dependency(uint32_t dst, uint32_t src); + }; + + void build_function_control_flow_graphs_and_analyze(); + std::unordered_map> function_cfgs; + const CFG &get_cfg_for_current_function() const; + const CFG &get_cfg_for_function(uint32_t id) const; + + struct CFGBuilder : OpcodeHandler + { + explicit CFGBuilder(Compiler &compiler_); + + bool follow_function_call(const SPIRFunction &func) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + std::unordered_map> function_cfgs; + }; + + struct AnalyzeVariableScopeAccessHandler : OpcodeHandler + { + AnalyzeVariableScopeAccessHandler(Compiler &compiler_, SPIRFunction &entry_); + + bool follow_function_call(const SPIRFunction &) override; + void set_current_block(const SPIRBlock &block) override; + + void notify_variable_access(uint32_t id, uint32_t block); + bool id_is_phi_variable(uint32_t id) const; + bool id_is_potential_temporary(uint32_t id) const; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + SPIRFunction &entry; + std::unordered_map> accessed_variables_to_block; + std::unordered_map> accessed_temporaries_to_block; + std::unordered_map result_id_to_type; + std::unordered_map> complete_write_variables_to_block; + std::unordered_map> partial_write_variables_to_block; + std::unordered_set access_chain_expressions; + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. + std::unordered_map> access_chain_children; + const SPIRBlock *current_block = nullptr; + }; + + struct StaticExpressionAccessHandler : OpcodeHandler + { + StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_); + bool follow_function_call(const SPIRFunction &) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t variable_id; + uint32_t static_expression = 0; + uint32_t write_count = 0; + }; + + struct PhysicalStorageBufferPointerHandler : OpcodeHandler + { + explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_); + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + std::unordered_set types; + }; + void analyze_non_block_pointer_types(); + SmallVector physical_storage_non_block_pointer_types; + + void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler); + void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler, + bool single_function); + bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var); + + // Finds all resources that are written to from inside the critical section, if present. + // The critical section is delimited by OpBeginInvocationInterlockEXT and + // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written + // while inside the critical section must be placed in a raster order group. + struct InterlockedResourceAccessHandler : OpcodeHandler + { + InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + bool in_crit_sec = false; + + uint32_t interlock_function_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + bool use_critical_section = false; + bool call_stack_is_interlocked = false; + SmallVector call_stack; + + void access_potential_resource(uint32_t id); + }; + + struct InterlockedResourceAccessPrepassHandler : OpcodeHandler + { + InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + void rearm_current_block(const SPIRBlock &block) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t interlock_function_id = 0; + uint32_t current_block_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + SmallVector call_stack; + }; + + void analyze_interlocked_resource_usage(); + // The set of all resources written while inside the critical section, if present. + std::unordered_set interlocked_resources; + bool interlocked_is_complex = false; + + void make_constant_null(uint32_t id, uint32_t type); + + std::unordered_map declared_block_names; + + bool instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, + uint32_t length); + + Bitset combined_decoration_for_member(const SPIRType &type, uint32_t index) const; + static bool is_desktop_only_format(spv::ImageFormat format); + + bool image_is_comparison(const SPIRType &type, uint32_t id) const; + + void set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value = 0); + uint32_t get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const; + bool has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const; + void unset_extended_decoration(uint32_t id, ExtendedDecorations decoration); + + void set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, + uint32_t value = 0); + uint32_t get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const; + bool has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const; + void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration); + + bool type_is_array_of_pointers(const SPIRType &type) const; + bool type_is_top_level_physical_pointer(const SPIRType &type) const; + bool type_is_block_like(const SPIRType &type) const; + bool type_is_opaque_value(const SPIRType &type) const; + + bool reflection_ssbo_instance_name_is_significant() const; + std::string get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const; + + bool flush_phi_required(BlockID from, BlockID to) const; + + uint32_t evaluate_spec_constant_u32(const SPIRConstantOp &spec) const; + uint32_t evaluate_constant_u32(uint32_t id) const; + + bool is_vertex_like_shader() const; + +private: + // Used only to implement the old deprecated get_entry_point() interface. + const SPIREntryPoint &get_first_entry_point(const std::string &name) const; + SPIREntryPoint &get_first_entry_point(const std::string &name); +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_cross_c.cpp b/dep/spirv-cross/spirv_cross_c.cpp new file mode 100644 index 000000000..a3a302105 --- /dev/null +++ b/dep/spirv-cross/spirv_cross_c.cpp @@ -0,0 +1,2514 @@ +/* + * Copyright 2019-2020 Hans-Kristian Arntzen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_cross_c.h" + +#if SPIRV_CROSS_C_API_CPP +#include "spirv_cpp.hpp" +#endif +#if SPIRV_CROSS_C_API_GLSL +#include "spirv_glsl.hpp" +#else +#include "spirv_cross.hpp" +#endif +#if SPIRV_CROSS_C_API_HLSL +#include "spirv_hlsl.hpp" +#endif +#if SPIRV_CROSS_C_API_MSL +#include "spirv_msl.hpp" +#endif +#if SPIRV_CROSS_C_API_REFLECT +#include "spirv_reflect.hpp" +#endif + +#ifdef HAVE_SPIRV_CROSS_GIT_VERSION +#include "gitversion.h" +#endif + +#include "spirv_parser.hpp" +#include +#include +#include + +// clang-format off + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#define SPVC_BEGIN_SAFE_SCOPE try +#else +#define SPVC_BEGIN_SAFE_SCOPE +#endif + +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#define SPVC_END_SAFE_SCOPE(context, error) \ + catch (const std::exception &e) \ + { \ + (context)->report_error(e.what()); \ + return (error); \ + } +#else +#define SPVC_END_SAFE_SCOPE(context, error) +#endif + +using namespace std; +using namespace SPIRV_CROSS_NAMESPACE; + +struct ScratchMemoryAllocation +{ + virtual ~ScratchMemoryAllocation() = default; +}; + +struct StringAllocation : ScratchMemoryAllocation +{ + explicit StringAllocation(const char *name) + : str(name) + { + } + + explicit StringAllocation(std::string name) + : str(std::move(name)) + { + } + + std::string str; +}; + +template +struct TemporaryBuffer : ScratchMemoryAllocation +{ + SmallVector buffer; +}; + +template +static inline std::unique_ptr spvc_allocate(Ts &&... ts) +{ + return std::unique_ptr(new T(std::forward(ts)...)); +} + +struct spvc_context_s +{ + string last_error; + SmallVector> allocations; + const char *allocate_name(const std::string &name); + + spvc_error_callback callback = nullptr; + void *callback_userdata = nullptr; + void report_error(std::string msg); +}; + +void spvc_context_s::report_error(std::string msg) +{ + last_error = std::move(msg); + if (callback) + callback(callback_userdata, last_error.c_str()); +} + +const char *spvc_context_s::allocate_name(const std::string &name) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto alloc = spvc_allocate(name); + auto *ret = alloc->str.c_str(); + allocations.emplace_back(std::move(alloc)); + return ret; + } + SPVC_END_SAFE_SCOPE(this, nullptr) +} + +struct spvc_parsed_ir_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + ParsedIR parsed; +}; + +struct spvc_compiler_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + unique_ptr compiler; + spvc_backend backend = SPVC_BACKEND_NONE; +}; + +struct spvc_compiler_options_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + uint32_t backend_flags = 0; +#if SPIRV_CROSS_C_API_GLSL + CompilerGLSL::Options glsl; +#endif +#if SPIRV_CROSS_C_API_MSL + CompilerMSL::Options msl; +#endif +#if SPIRV_CROSS_C_API_HLSL + CompilerHLSL::Options hlsl; +#endif +}; + +struct spvc_set_s : ScratchMemoryAllocation +{ + std::unordered_set set; +}; + +// Dummy-inherit to we can keep our opaque type handle type safe in C-land as well, +// and avoid just throwing void * around. +struct spvc_type_s : SPIRType +{ +}; + +struct spvc_constant_s : SPIRConstant +{ +}; + +struct spvc_resources_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + SmallVector uniform_buffers; + SmallVector storage_buffers; + SmallVector stage_inputs; + SmallVector stage_outputs; + SmallVector subpass_inputs; + SmallVector storage_images; + SmallVector sampled_images; + SmallVector atomic_counters; + SmallVector push_constant_buffers; + SmallVector separate_images; + SmallVector separate_samplers; + SmallVector acceleration_structures; + + bool copy_resources(SmallVector &outputs, const SmallVector &inputs); + bool copy_resources(const ShaderResources &resources); +}; + +spvc_result spvc_context_create(spvc_context *context) +{ + auto *ctx = new (std::nothrow) spvc_context_s; + if (!ctx) + return SPVC_ERROR_OUT_OF_MEMORY; + + *context = ctx; + return SPVC_SUCCESS; +} + +void spvc_context_destroy(spvc_context context) +{ + delete context; +} + +void spvc_context_release_allocations(spvc_context context) +{ + context->allocations.clear(); +} + +const char *spvc_context_get_last_error_string(spvc_context context) +{ + return context->last_error.c_str(); +} + +SPVC_PUBLIC_API void spvc_context_set_error_callback(spvc_context context, spvc_error_callback cb, void *userdata) +{ + context->callback = cb; + context->callback_userdata = userdata; +} + +spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, size_t word_count, + spvc_parsed_ir *parsed_ir) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr pir(new (std::nothrow) spvc_parsed_ir_s); + if (!pir) + { + context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + pir->context = context; + Parser parser(spirv, word_count); + parser.parse(); + pir->parsed = move(parser.get_parsed_ir()); + *parsed_ir = pir.get(); + context->allocations.push_back(std::move(pir)); + } + SPVC_END_SAFE_SCOPE(context, SPVC_ERROR_INVALID_SPIRV) + return SPVC_SUCCESS; +} + +spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend backend, spvc_parsed_ir parsed_ir, + spvc_capture_mode mode, spvc_compiler *compiler) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr comp(new (std::nothrow) spvc_compiler_s); + if (!comp) + { + context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + comp->backend = backend; + comp->context = context; + + if (mode != SPVC_CAPTURE_MODE_COPY && mode != SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + { + context->report_error("Invalid argument for capture mode."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + switch (backend) + { + case SPVC_BACKEND_NONE: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new Compiler(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new Compiler(parsed_ir->parsed)); + break; + +#if SPIRV_CROSS_C_API_GLSL + case SPVC_BACKEND_GLSL: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerGLSL(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerGLSL(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_BACKEND_HLSL: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerHLSL(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerHLSL(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_MSL + case SPVC_BACKEND_MSL: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerMSL(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerMSL(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_CPP + case SPVC_BACKEND_CPP: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerCPP(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerCPP(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_REFLECT + case SPVC_BACKEND_JSON: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerReflection(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerReflection(parsed_ir->parsed)); + break; +#endif + + default: + context->report_error("Invalid backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + *compiler = comp.get(); + context->allocations.push_back(std::move(comp)); + } + SPVC_END_SAFE_SCOPE(context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_create_compiler_options(spvc_compiler compiler, spvc_compiler_options *options) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr opt(new (std::nothrow) spvc_compiler_options_s); + if (!opt) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + opt->context = compiler->context; + opt->backend_flags = 0; + switch (compiler->backend) + { +#if SPIRV_CROSS_C_API_MSL + case SPVC_BACKEND_MSL: + opt->backend_flags |= SPVC_COMPILER_OPTION_MSL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; + opt->glsl = static_cast(compiler->compiler.get())->get_common_options(); + opt->msl = static_cast(compiler->compiler.get())->get_msl_options(); + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_BACKEND_HLSL: + opt->backend_flags |= SPVC_COMPILER_OPTION_HLSL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; + opt->glsl = static_cast(compiler->compiler.get())->get_common_options(); + opt->hlsl = static_cast(compiler->compiler.get())->get_hlsl_options(); + break; +#endif + +#if SPIRV_CROSS_C_API_GLSL + case SPVC_BACKEND_GLSL: + opt->backend_flags |= SPVC_COMPILER_OPTION_GLSL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; + opt->glsl = static_cast(compiler->compiler.get())->get_common_options(); + break; +#endif + + default: + break; + } + + *options = opt.get(); + compiler->context->allocations.push_back(std::move(opt)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_options_set_bool(spvc_compiler_options options, spvc_compiler_option option, + spvc_bool value) +{ + return spvc_compiler_options_set_uint(options, option, value ? 1 : 0); +} + +spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_compiler_option option, unsigned value) +{ + (void)value; + (void)option; + uint32_t supported_mask = options->backend_flags; + uint32_t required_mask = option & SPVC_COMPILER_OPTION_LANG_BITS; + if ((required_mask | supported_mask) != supported_mask) + { + options->context->report_error("Option is not supported by current backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + switch (option) + { +#if SPIRV_CROSS_C_API_GLSL + case SPVC_COMPILER_OPTION_FORCE_TEMPORARY: + options->glsl.force_temporary = value != 0; + break; + case SPVC_COMPILER_OPTION_FLATTEN_MULTIDIMENSIONAL_ARRAYS: + options->glsl.flatten_multidimensional_arrays = value != 0; + break; + case SPVC_COMPILER_OPTION_FIXUP_DEPTH_CONVENTION: + options->glsl.vertex.fixup_clipspace = value != 0; + break; + case SPVC_COMPILER_OPTION_FLIP_VERTEX_Y: + options->glsl.vertex.flip_vert_y = value != 0; + break; + case SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES: + options->glsl.emit_line_directives = value != 0; + break; + case SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION: + options->glsl.enable_storage_image_qualifier_deduction = value != 0; + break; + case SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES: + options->glsl.force_zero_initialized_variables = value != 0; + break; + + case SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE: + options->glsl.vertex.support_nonzero_base_instance = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_SEPARATE_SHADER_OBJECTS: + options->glsl.separate_shader_objects = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION: + options->glsl.enable_420pack_extension = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_VERSION: + options->glsl.version = value; + break; + case SPVC_COMPILER_OPTION_GLSL_ES: + options->glsl.es = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS: + options->glsl.vulkan_semantics = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_FLOAT_PRECISION_HIGHP: + options->glsl.fragment.default_float_precision = + value != 0 ? CompilerGLSL::Options::Precision::Highp : CompilerGLSL::Options::Precision::Mediump; + break; + case SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_INT_PRECISION_HIGHP: + options->glsl.fragment.default_int_precision = + value != 0 ? CompilerGLSL::Options::Precision::Highp : CompilerGLSL::Options::Precision::Mediump; + break; + case SPVC_COMPILER_OPTION_GLSL_EMIT_PUSH_CONSTANT_AS_UNIFORM_BUFFER: + options->glsl.emit_push_constant_as_uniform_buffer = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS: + options->glsl.emit_uniform_buffer_as_plain_uniforms = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS: + options->glsl.force_flattened_io_blocks = value != 0; + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL: + options->hlsl.shader_model = value; + break; + + case SPVC_COMPILER_OPTION_HLSL_POINT_SIZE_COMPAT: + options->hlsl.point_size_compat = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_POINT_COORD_COMPAT: + options->hlsl.point_coord_compat = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE: + options->hlsl.support_nonzero_base_vertex_base_instance = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV: + options->hlsl.force_storage_buffer_as_uav = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV: + options->hlsl.nonwritable_uav_texture_as_srv = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES: + options->hlsl.enable_16bit_types = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS: + options->hlsl.flatten_matrix_vertex_input_semantics = value != 0; + break; +#endif + +#if SPIRV_CROSS_C_API_MSL + case SPVC_COMPILER_OPTION_MSL_VERSION: + options->msl.msl_version = value; + break; + + case SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH: + options->msl.texel_buffer_texture_width = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX: + options->msl.swizzle_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX: + options->msl.indirect_params_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX: + options->msl.shader_output_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX: + options->msl.shader_patch_output_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_TESS_FACTOR_OUTPUT_BUFFER_INDEX: + options->msl.shader_tess_factor_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_WORKGROUP_INDEX: + options->msl.shader_input_wg_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_BUILTIN: + options->msl.enable_point_size_builtin = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DISABLE_RASTERIZATION: + options->msl.disable_rasterization = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_CAPTURE_OUTPUT_TO_BUFFER: + options->msl.capture_output_to_buffer = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_SWIZZLE_TEXTURE_SAMPLES: + options->msl.swizzle_texture_samples = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS: + options->msl.pad_fragment_output_components = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_TESS_DOMAIN_ORIGIN_LOWER_LEFT: + options->msl.tess_domain_origin_lower_left = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_PLATFORM: + options->msl.platform = static_cast(value); + break; + + case SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS: + options->msl.argument_buffers = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE: + options->msl.texture_buffer_native = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX: + options->msl.buffer_size_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_MULTIVIEW: + options->msl.multiview = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX: + options->msl.view_mask_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX: + options->msl.device_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX: + options->msl.view_index_from_device_index = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE: + options->msl.dispatch_base = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX: + options->msl.dynamic_offsets_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D: + options->msl.texture_1D_as_2D = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO: + options->msl.enable_base_index_zero = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS: + options->msl.use_framebuffer_fetch_subpasses = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH: + options->msl.invariant_float_math = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY: + options->msl.emulate_cube_array = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING: + options->msl.enable_decoration_binding = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES: + options->msl.force_active_argument_buffer_resources = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS: + options->msl.force_native_arrays = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK: + options->msl.enable_frag_output_mask = value; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN: + options->msl.enable_frag_depth_builtin = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN: + options->msl.enable_frag_stencil_ref_builtin = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING: + options->msl.enable_clip_distance_user_varying = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP: + options->msl.multi_patch_workgroup = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX: + options->msl.shader_input_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX: + options->msl.shader_index_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION: + options->msl.vertex_for_tessellation = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE: + options->msl.vertex_index_type = static_cast(value); + break; + + case SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING: + options->msl.multiview_layered_rendering = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT: + options->msl.arrayed_subpass_input = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT: + options->msl.r32ui_linear_texture_alignment = value; + break; + + case SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID: + options->msl.r32ui_alignment_constant_id = value; + break; + + case SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS: + options->msl.ios_use_simdgroup_functions = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS: + options->msl.emulate_subgroups = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE: + options->msl.fixed_subgroup_size = value; + break; + + case SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING: + options->msl.force_sample_rate_shading = value != 0; + break; +#endif + + default: + options->context->report_error("Unknown option."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_install_compiler_options(spvc_compiler compiler, spvc_compiler_options options) +{ + (void)options; + switch (compiler->backend) + { +#if SPIRV_CROSS_C_API_GLSL + case SPVC_BACKEND_GLSL: + static_cast(*compiler->compiler).set_common_options(options->glsl); + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_BACKEND_HLSL: + static_cast(*compiler->compiler).set_common_options(options->glsl); + static_cast(*compiler->compiler).set_hlsl_options(options->hlsl); + break; +#endif + +#if SPIRV_CROSS_C_API_MSL + case SPVC_BACKEND_MSL: + static_cast(*compiler->compiler).set_common_options(options->glsl); + static_cast(*compiler->compiler).set_msl_options(options->msl); + break; +#endif + + default: + break; + } + + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_add_header_line(spvc_compiler compiler, const char *line) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast(compiler->compiler.get())->add_header_line(line); + return SPVC_SUCCESS; +#else + (void)line; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *line) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast(compiler->compiler.get())->require_extension(line); + return SPVC_SUCCESS; +#else + (void)line; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast(compiler->compiler.get())->flatten_buffer_block(id); + return SPVC_SUCCESS; +#else + (void)id; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + return static_cast(compiler->compiler.get())->variable_is_depth_or_compare(id) ? SPVC_TRUE : SPVC_FALSE; +#else + (void)id; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_FALSE; +#endif +} + +spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler, + const spvc_hlsl_root_constants *constant_info, + size_t count) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + vector roots; + roots.reserve(count); + for (size_t i = 0; i < count; i++) + { + RootConstants root; + root.binding = constant_info[i].binding; + root.space = constant_info[i].space; + root.start = constant_info[i].start; + root.end = constant_info[i].end; + roots.push_back(root); + } + + hlsl.set_root_constant_layouts(std::move(roots)); + return SPVC_SUCCESS; +#else + (void)constant_info; + (void)count; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_compiler compiler, + const spvc_hlsl_vertex_attribute_remap *remap, + size_t count) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + HLSLVertexAttributeRemap re; + auto &hlsl = *static_cast(compiler->compiler.get()); + for (size_t i = 0; i < count; i++) + { + re.location = remap[i].location; + re.semantic = remap[i].semantic; + hlsl.add_vertex_attribute_remap(re); + } + + return SPVC_SUCCESS; +#else + (void)remap; + (void)count; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return 0; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + return hlsl.remap_num_workgroups_builtin(); +#else + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return 0; +#endif +} + +spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, + spvc_hlsl_binding_flags flags) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + hlsl.set_resource_binding_flags(flags); + return SPVC_SUCCESS; +#else + (void)flags; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler, + const spvc_hlsl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + HLSLResourceBinding bind; + bind.binding = binding->binding; + bind.desc_set = binding->desc_set; + bind.stage = static_cast(binding->stage); + bind.cbv.register_binding = binding->cbv.register_binding; + bind.cbv.register_space = binding->cbv.register_space; + bind.uav.register_binding = binding->uav.register_binding; + bind.uav.register_space = binding->uav.register_space; + bind.srv.register_binding = binding->srv.register_binding; + bind.srv.register_space = binding->srv.register_space; + bind.sampler.register_binding = binding->sampler.register_binding; + bind.sampler.register_space = binding->sampler.register_space; + hlsl.add_hlsl_resource_binding(bind); + return SPVC_SUCCESS; +#else + (void)binding; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set, + unsigned binding) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_FALSE; + } + + auto &hlsl = *static_cast(compiler->compiler.get()); + return hlsl.is_hlsl_resource_binding_used(static_cast(model), set, binding) ? SPVC_TRUE : + SPVC_FALSE; +#else + (void)model; + (void)set; + (void)binding; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.get_is_rasterization_disabled() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.needs_swizzle_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.needs_buffer_size_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler) +{ + return spvc_compiler_msl_needs_swizzle_buffer(compiler); +} + +spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.needs_output_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.needs_patch_output_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.needs_input_threadgroup_mem() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const spvc_msl_vertex_attribute *va) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLShaderInput attr; + attr.location = va->location; + attr.format = static_cast(va->format); + attr.builtin = static_cast(va->builtin); + msl.add_msl_shader_input(attr); + return SPVC_SUCCESS; +#else + (void)va; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, const spvc_msl_shader_input *si) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLShaderInput input; + input.location = si->location; + input.format = static_cast(si->format); + input.builtin = static_cast(si->builtin); + input.vecsize = si->vecsize; + msl.add_msl_shader_input(input); + return SPVC_SUCCESS; +#else + (void)si; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, + const spvc_msl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLResourceBinding bind; + bind.binding = binding->binding; + bind.desc_set = binding->desc_set; + bind.stage = static_cast(binding->stage); + bind.msl_buffer = binding->msl_buffer; + bind.msl_texture = binding->msl_texture; + bind.msl_sampler = binding->msl_sampler; + msl.add_msl_resource_binding(bind); + return SPVC_SUCCESS; +#else + (void)binding; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.add_dynamic_buffer(desc_set, binding, index); + return SPVC_SUCCESS; +#else + (void)binding; + (void)desc_set; + (void)index; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.add_inline_uniform_block(desc_set, binding); + return SPVC_SUCCESS; +#else + (void)binding; + (void)desc_set; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.add_discrete_descriptor_set(desc_set); + return SPVC_SUCCESS; +#else + (void)desc_set; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.set_argument_buffer_device_address_space(desc_set, bool(device_address)); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)device_address; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.is_msl_shader_input_used(location) ? SPVC_TRUE : SPVC_FALSE; +#else + (void)location; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location) +{ + return spvc_compiler_msl_is_shader_input_used(compiler, location); +} + +spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set, + unsigned binding) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.is_msl_resource_binding_used(static_cast(model), set, binding) ? SPVC_TRUE : + SPVC_FALSE; +#else + (void)model; + (void)set; + (void)binding; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, const char *suffix) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.set_combined_sampler_suffix(suffix); + return SPVC_SUCCESS; +#else + (void)suffix; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return ""; + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.get_combined_sampler_suffix(); +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return ""; +#endif +} + +#if SPIRV_CROSS_C_API_MSL +static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_constexpr_sampler *sampler) +{ + samp.s_address = static_cast(sampler->s_address); + samp.t_address = static_cast(sampler->t_address); + samp.r_address = static_cast(sampler->r_address); + samp.lod_clamp_min = sampler->lod_clamp_min; + samp.lod_clamp_max = sampler->lod_clamp_max; + samp.lod_clamp_enable = sampler->lod_clamp_enable != 0; + samp.min_filter = static_cast(sampler->min_filter); + samp.mag_filter = static_cast(sampler->mag_filter); + samp.mip_filter = static_cast(sampler->mip_filter); + samp.compare_enable = sampler->compare_enable != 0; + samp.anisotropy_enable = sampler->anisotropy_enable != 0; + samp.max_anisotropy = sampler->max_anisotropy; + samp.compare_func = static_cast(sampler->compare_func); + samp.coord = static_cast(sampler->coord); + samp.border_color = static_cast(sampler->border_color); +} + +static void spvc_convert_msl_sampler_ycbcr_conversion(MSLConstexprSampler &samp, const spvc_msl_sampler_ycbcr_conversion *conv) +{ + samp.ycbcr_conversion_enable = conv != nullptr; + if (conv == nullptr) return; + samp.planes = conv->planes; + samp.resolution = static_cast(conv->resolution); + samp.chroma_filter = static_cast(conv->chroma_filter); + samp.x_chroma_offset = static_cast(conv->x_chroma_offset); + samp.y_chroma_offset = static_cast(conv->y_chroma_offset); + for (int i = 0; i < 4; i++) + samp.swizzle[i] = static_cast(conv->swizzle[i]); + samp.ycbcr_model = static_cast(conv->ycbcr_model); + samp.ycbcr_range = static_cast(conv->ycbcr_range); + samp.bpc = conv->bpc; +} +#endif + +spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + msl.remap_constexpr_sampler(id, samp); + return SPVC_SUCCESS; +#else + (void)id; + (void)sampler; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, + unsigned desc_set, unsigned binding, + const spvc_msl_constexpr_sampler *sampler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)binding; + (void)sampler; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); + msl.remap_constexpr_sampler(id, samp); + return SPVC_SUCCESS; +#else + (void)id; + (void)sampler; + (void)conv; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, + unsigned desc_set, unsigned binding, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); + msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)binding; + (void)sampler; + (void)conv; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, + unsigned components) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.set_fragment_output_components(location, components); + return SPVC_SUCCESS; +#else + (void)location; + (void)components; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.get_automatic_msl_resource_binding(id); +#else + (void)id; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); +#endif +} + +unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); + } + + auto &msl = *static_cast(compiler->compiler.get()); + return msl.get_automatic_msl_resource_binding_secondary(id); +#else + (void)id; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return uint32_t(-1); +#endif +} + +spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto result = compiler->compiler->compile(); + if (result.empty()) + { + compiler->context->report_error("Unsupported SPIR-V."); + return SPVC_ERROR_UNSUPPORTED_SPIRV; + } + + *source = compiler->context->allocate_name(result); + if (!*source) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + return SPVC_SUCCESS; + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_UNSUPPORTED_SPIRV) +} + +bool spvc_resources_s::copy_resources(SmallVector &outputs, + const SmallVector &inputs) +{ + for (auto &i : inputs) + { + spvc_reflected_resource r; + r.base_type_id = i.base_type_id; + r.type_id = i.type_id; + r.id = i.id; + r.name = context->allocate_name(i.name); + if (!r.name) + return false; + + outputs.push_back(r); + } + + return true; +} + +bool spvc_resources_s::copy_resources(const ShaderResources &resources) +{ + if (!copy_resources(uniform_buffers, resources.uniform_buffers)) + return false; + if (!copy_resources(storage_buffers, resources.storage_buffers)) + return false; + if (!copy_resources(stage_inputs, resources.stage_inputs)) + return false; + if (!copy_resources(stage_outputs, resources.stage_outputs)) + return false; + if (!copy_resources(subpass_inputs, resources.subpass_inputs)) + return false; + if (!copy_resources(storage_images, resources.storage_images)) + return false; + if (!copy_resources(sampled_images, resources.sampled_images)) + return false; + if (!copy_resources(atomic_counters, resources.atomic_counters)) + return false; + if (!copy_resources(push_constant_buffers, resources.push_constant_buffers)) + return false; + if (!copy_resources(separate_images, resources.separate_images)) + return false; + if (!copy_resources(separate_samplers, resources.separate_samplers)) + return false; + if (!copy_resources(acceleration_structures, resources.acceleration_structures)) + return false; + + return true; +} + +spvc_result spvc_compiler_get_active_interface_variables(spvc_compiler compiler, spvc_set *set) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr ptr(new (std::nothrow) spvc_set_s); + if (!ptr) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + auto active = compiler->compiler->get_active_interface_variables(); + ptr->set = std::move(active); + *set = ptr.get(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_set_enabled_interface_variables(spvc_compiler compiler, spvc_set set) +{ + SPVC_BEGIN_SAFE_SCOPE + { + compiler->compiler->set_enabled_interface_variables(set->set); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_create_shader_resources_for_active_variables(spvc_compiler compiler, spvc_resources *resources, + spvc_set set) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr res(new (std::nothrow) spvc_resources_s); + if (!res) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + res->context = compiler->context; + auto accessed_resources = compiler->compiler->get_shader_resources(set->set); + + if (!res->copy_resources(accessed_resources)) + { + res->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + *resources = res.get(); + compiler->context->allocations.push_back(std::move(res)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_create_shader_resources(spvc_compiler compiler, spvc_resources *resources) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr res(new (std::nothrow) spvc_resources_s); + if (!res) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + res->context = compiler->context; + auto accessed_resources = compiler->compiler->get_shader_resources(); + + if (!res->copy_resources(accessed_resources)) + { + res->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + *resources = res.get(); + compiler->context->allocations.push_back(std::move(res)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, spvc_resource_type type, + const spvc_reflected_resource **resource_list, + size_t *resource_size) +{ + const SmallVector *list = nullptr; + switch (type) + { + case SPVC_RESOURCE_TYPE_UNIFORM_BUFFER: + list = &resources->uniform_buffers; + break; + + case SPVC_RESOURCE_TYPE_STORAGE_BUFFER: + list = &resources->storage_buffers; + break; + + case SPVC_RESOURCE_TYPE_STAGE_INPUT: + list = &resources->stage_inputs; + break; + + case SPVC_RESOURCE_TYPE_STAGE_OUTPUT: + list = &resources->stage_outputs; + break; + + case SPVC_RESOURCE_TYPE_SUBPASS_INPUT: + list = &resources->subpass_inputs; + break; + + case SPVC_RESOURCE_TYPE_STORAGE_IMAGE: + list = &resources->storage_images; + break; + + case SPVC_RESOURCE_TYPE_SAMPLED_IMAGE: + list = &resources->sampled_images; + break; + + case SPVC_RESOURCE_TYPE_ATOMIC_COUNTER: + list = &resources->atomic_counters; + break; + + case SPVC_RESOURCE_TYPE_PUSH_CONSTANT: + list = &resources->push_constant_buffers; + break; + + case SPVC_RESOURCE_TYPE_SEPARATE_IMAGE: + list = &resources->separate_images; + break; + + case SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS: + list = &resources->separate_samplers; + break; + + case SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE: + list = &resources->acceleration_structures; + break; + + default: + break; + } + + if (!list) + { + resources->context->report_error("Invalid argument."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + *resource_size = list->size(); + *resource_list = list->data(); + return SPVC_SUCCESS; +} + +void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, unsigned argument) +{ + compiler->compiler->set_decoration(id, static_cast(decoration), argument); +} + +void spvc_compiler_set_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + const char *argument) +{ + compiler->compiler->set_decoration_string(id, static_cast(decoration), argument); +} + +void spvc_compiler_set_name(spvc_compiler compiler, SpvId id, const char *argument) +{ + compiler->compiler->set_name(id, argument); +} + +void spvc_compiler_set_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration, unsigned argument) +{ + compiler->compiler->set_member_decoration(id, member_index, static_cast(decoration), argument); +} + +void spvc_compiler_set_member_decoration_string(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration, const char *argument) +{ + compiler->compiler->set_member_decoration_string(id, member_index, static_cast(decoration), + argument); +} + +void spvc_compiler_set_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index, const char *argument) +{ + compiler->compiler->set_member_name(id, member_index, argument); +} + +void spvc_compiler_unset_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + compiler->compiler->unset_decoration(id, static_cast(decoration)); +} + +void spvc_compiler_unset_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + compiler->compiler->unset_member_decoration(id, member_index, static_cast(decoration)); +} + +spvc_bool spvc_compiler_has_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + return compiler->compiler->has_decoration(id, static_cast(decoration)) ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_compiler_has_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + return compiler->compiler->has_member_decoration(id, member_index, static_cast(decoration)) ? + SPVC_TRUE : + SPVC_FALSE; +} + +const char *spvc_compiler_get_name(spvc_compiler compiler, SpvId id) +{ + return compiler->compiler->get_name(id).c_str(); +} + +unsigned spvc_compiler_get_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + return compiler->compiler->get_decoration(id, static_cast(decoration)); +} + +const char *spvc_compiler_get_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + return compiler->compiler->get_decoration_string(id, static_cast(decoration)).c_str(); +} + +unsigned spvc_compiler_get_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + return compiler->compiler->get_member_decoration(id, member_index, static_cast(decoration)); +} + +const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + return compiler->compiler->get_member_decoration_string(id, member_index, static_cast(decoration)) + .c_str(); +} + +const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index) +{ + return compiler->compiler->get_member_name(id, member_index).c_str(); +} + +spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, const spvc_entry_point **entry_points, + size_t *num_entry_points) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto entries = compiler->compiler->get_entry_points_and_stages(); + SmallVector translated; + translated.reserve(entries.size()); + + for (auto &entry : entries) + { + spvc_entry_point new_entry; + new_entry.execution_model = static_cast(entry.execution_model); + new_entry.name = compiler->context->allocate_name(entry.name); + if (!new_entry.name) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + translated.push_back(new_entry); + } + + auto ptr = spvc_allocate>(); + ptr->buffer = std::move(translated); + *entry_points = ptr->buffer.data(); + *num_entry_points = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_set_entry_point(spvc_compiler compiler, const char *name, SpvExecutionModel model) +{ + compiler->compiler->set_entry_point(name, static_cast(model)); + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_rename_entry_point(spvc_compiler compiler, const char *old_name, const char *new_name, + SpvExecutionModel model) +{ + SPVC_BEGIN_SAFE_SCOPE + { + compiler->compiler->rename_entry_point(old_name, new_name, static_cast(model)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +const char *spvc_compiler_get_cleansed_entry_point_name(spvc_compiler compiler, const char *name, + SpvExecutionModel model) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto cleansed_name = + compiler->compiler->get_cleansed_entry_point_name(name, static_cast(model)); + return compiler->context->allocate_name(cleansed_name); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +void spvc_compiler_set_execution_mode(spvc_compiler compiler, SpvExecutionMode mode) +{ + compiler->compiler->set_execution_mode(static_cast(mode)); +} + +void spvc_compiler_set_execution_mode_with_arguments(spvc_compiler compiler, SpvExecutionMode mode, unsigned arg0, + unsigned arg1, + unsigned arg2) +{ + compiler->compiler->set_execution_mode(static_cast(mode), arg0, arg1, arg2); +} + +void spvc_compiler_unset_execution_mode(spvc_compiler compiler, SpvExecutionMode mode) +{ + compiler->compiler->unset_execution_mode(static_cast(mode)); +} + +spvc_result spvc_compiler_get_execution_modes(spvc_compiler compiler, const SpvExecutionMode **modes, size_t *num_modes) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto ptr = spvc_allocate>(); + + compiler->compiler->get_execution_mode_bitset().for_each_bit( + [&](uint32_t bit) { ptr->buffer.push_back(static_cast(bit)); }); + + *modes = ptr->buffer.data(); + *num_modes = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler compiler, SpvExecutionMode mode) +{ + return compiler->compiler->get_execution_mode_argument(static_cast(mode)); +} + +unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler, SpvExecutionMode mode, + unsigned index) +{ + return compiler->compiler->get_execution_mode_argument(static_cast(mode), index); +} + +SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler) +{ + return static_cast(compiler->compiler->get_execution_model()); +} + +spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id) +{ + // Should only throw if an intentionally garbage ID is passed, but the IDs are not type-safe. + SPVC_BEGIN_SAFE_SCOPE + { + return static_cast(&compiler->compiler->get_type(id)); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +spvc_type_id spvc_type_get_base_type_id(spvc_type type) +{ + return type->self; +} + +static spvc_basetype convert_basetype(SPIRType::BaseType type) +{ + // For now the enums match up. + return static_cast(type); +} + +spvc_basetype spvc_type_get_basetype(spvc_type type) +{ + return convert_basetype(type->basetype); +} + +unsigned spvc_type_get_bit_width(spvc_type type) +{ + return type->width; +} + +unsigned spvc_type_get_vector_size(spvc_type type) +{ + return type->vecsize; +} + +unsigned spvc_type_get_columns(spvc_type type) +{ + return type->columns; +} + +unsigned spvc_type_get_num_array_dimensions(spvc_type type) +{ + return unsigned(type->array.size()); +} + +spvc_bool spvc_type_array_dimension_is_literal(spvc_type type, unsigned dimension) +{ + return type->array_size_literal[dimension] ? SPVC_TRUE : SPVC_FALSE; +} + +SpvId spvc_type_get_array_dimension(spvc_type type, unsigned dimension) +{ + return type->array[dimension]; +} + +unsigned spvc_type_get_num_member_types(spvc_type type) +{ + return unsigned(type->member_types.size()); +} + +spvc_type_id spvc_type_get_member_type(spvc_type type, unsigned index) +{ + return type->member_types[index]; +} + +SpvStorageClass spvc_type_get_storage_class(spvc_type type) +{ + return static_cast(type->storage); +} + +// Image type query. +spvc_type_id spvc_type_get_image_sampled_type(spvc_type type) +{ + return type->image.type; +} + +SpvDim spvc_type_get_image_dimension(spvc_type type) +{ + return static_cast(type->image.dim); +} + +spvc_bool spvc_type_get_image_is_depth(spvc_type type) +{ + return type->image.depth ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_type_get_image_arrayed(spvc_type type) +{ + return type->image.arrayed ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_type_get_image_multisampled(spvc_type type) +{ + return type->image.ms ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_type_get_image_is_storage(spvc_type type) +{ + return type->image.sampled == 2 ? SPVC_TRUE : SPVC_FALSE; +} + +SpvImageFormat spvc_type_get_image_storage_format(spvc_type type) +{ + return static_cast(static_cast(type)->image.format); +} + +SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_type type) +{ + return static_cast(static_cast(type)->image.access); +} + +spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *size = compiler->compiler->get_declared_struct_size(*static_cast(struct_type)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, spvc_type struct_type, + size_t array_size, size_t *size) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *size = compiler->compiler->get_declared_struct_size_runtime_array(*static_cast(struct_type), + array_size); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *size = compiler->compiler->get_declared_struct_member_size(*static_cast(struct_type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *offset = compiler->compiler->type_struct_member_offset(*static_cast(type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_type_struct_member_array_stride(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *stride) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *stride = compiler->compiler->type_struct_member_array_stride(*static_cast(type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_type_struct_member_matrix_stride(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *stride) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *stride = compiler->compiler->type_struct_member_matrix_stride(*static_cast(type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_build_dummy_sampler_for_combined_images(spvc_compiler compiler, spvc_variable_id *id) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *id = compiler->compiler->build_dummy_sampler_for_combined_images(); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_build_combined_image_samplers(spvc_compiler compiler) +{ + SPVC_BEGIN_SAFE_SCOPE + { + compiler->compiler->build_combined_image_samplers(); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_UNSUPPORTED_SPIRV) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_combined_image_samplers(spvc_compiler compiler, + const spvc_combined_image_sampler **samplers, + size_t *num_samplers) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto combined = compiler->compiler->get_combined_image_samplers(); + SmallVector translated; + translated.reserve(combined.size()); + for (auto &c : combined) + { + spvc_combined_image_sampler trans = { c.combined_id, c.image_id, c.sampler_id }; + translated.push_back(trans); + } + + auto ptr = spvc_allocate>(); + ptr->buffer = std::move(translated); + *samplers = ptr->buffer.data(); + *num_samplers = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_specialization_constants(spvc_compiler compiler, + const spvc_specialization_constant **constants, + size_t *num_constants) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto spec_constants = compiler->compiler->get_specialization_constants(); + SmallVector translated; + translated.reserve(spec_constants.size()); + for (auto &c : spec_constants) + { + spvc_specialization_constant trans = { c.id, c.constant_id }; + translated.push_back(trans); + } + + auto ptr = spvc_allocate>(); + ptr->buffer = std::move(translated); + *constants = ptr->buffer.data(); + *num_constants = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_constant spvc_compiler_get_constant_handle(spvc_compiler compiler, spvc_variable_id id) +{ + SPVC_BEGIN_SAFE_SCOPE + { + return static_cast(&compiler->compiler->get_constant(id)); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc_compiler compiler, + spvc_specialization_constant *x, + spvc_specialization_constant *y, + spvc_specialization_constant *z) +{ + SpecializationConstant tmpx; + SpecializationConstant tmpy; + SpecializationConstant tmpz; + spvc_constant_id ret = compiler->compiler->get_work_group_size_specialization_constants(tmpx, tmpy, tmpz); + x->id = tmpx.id; + x->constant_id = tmpx.constant_id; + y->id = tmpy.id; + y->constant_id = tmpy.constant_id; + z->id = tmpz.id; + z->constant_id = tmpz.constant_id; + return ret; +} + +spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler, + spvc_variable_id id, + const spvc_buffer_range **ranges, + size_t *num_ranges) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto active_ranges = compiler->compiler->get_active_buffer_ranges(id); + SmallVector translated; + translated.reserve(active_ranges.size()); + for (auto &r : active_ranges) + { + spvc_buffer_range trans = { r.index, r.offset, r.range }; + translated.push_back(trans); + } + + auto ptr = spvc_allocate>(); + ptr->buffer = std::move(translated); + *ranges = ptr->buffer.data(); + *num_ranges = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_f16(column, row); +} + +float spvc_constant_get_scalar_fp32(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_f32(column, row); +} + +double spvc_constant_get_scalar_fp64(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_f64(column, row); +} + +unsigned spvc_constant_get_scalar_u32(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar(column, row); +} + +int spvc_constant_get_scalar_i32(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_i32(column, row); +} + +unsigned spvc_constant_get_scalar_u16(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_u16(column, row); +} + +int spvc_constant_get_scalar_i16(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_i16(column, row); +} + +unsigned spvc_constant_get_scalar_u8(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_u8(column, row); +} + +int spvc_constant_get_scalar_i8(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_i8(column, row); +} + +void spvc_constant_get_subconstants(spvc_constant constant, const spvc_constant_id **constituents, size_t *count) +{ + static_assert(sizeof(spvc_constant_id) == sizeof(constant->subconstants.front()), "ID size is not consistent."); + *constituents = reinterpret_cast(constant->subconstants.data()); + *count = constant->subconstants.size(); +} + +spvc_type_id spvc_constant_get_type(spvc_constant constant) +{ + return constant->constant_type; +} + +spvc_bool spvc_compiler_get_binary_offset_for_decoration(spvc_compiler compiler, spvc_variable_id id, + SpvDecoration decoration, + unsigned *word_offset) +{ + uint32_t off = 0; + bool ret = compiler->compiler->get_binary_offset_for_decoration(id, static_cast(decoration), off); + if (ret) + { + *word_offset = off; + return SPVC_TRUE; + } + else + return SPVC_FALSE; +} + +spvc_bool spvc_compiler_buffer_is_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id) +{ + return compiler->compiler->buffer_is_hlsl_counter_buffer(id) ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_compiler_buffer_get_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id, + spvc_variable_id *counter_id) +{ + uint32_t buffer; + bool ret = compiler->compiler->buffer_get_hlsl_counter_buffer(id, buffer); + if (ret) + { + *counter_id = buffer; + return SPVC_TRUE; + } + else + return SPVC_FALSE; +} + +spvc_result spvc_compiler_get_declared_capabilities(spvc_compiler compiler, const SpvCapability **capabilities, + size_t *num_capabilities) +{ + auto &caps = compiler->compiler->get_declared_capabilities(); + static_assert(sizeof(SpvCapability) == sizeof(spv::Capability), "Enum size mismatch."); + *capabilities = reinterpret_cast(caps.data()); + *num_capabilities = caps.size(); + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_declared_extensions(spvc_compiler compiler, const char ***extensions, + size_t *num_extensions) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto &exts = compiler->compiler->get_declared_extensions(); + SmallVector duped; + duped.reserve(exts.size()); + for (auto &ext : exts) + duped.push_back(compiler->context->allocate_name(ext)); + + auto ptr = spvc_allocate>(); + ptr->buffer = std::move(duped); + *extensions = ptr->buffer.data(); + *num_extensions = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +const char *spvc_compiler_get_remapped_declared_block_name(spvc_compiler compiler, spvc_variable_id id) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto name = compiler->compiler->get_remapped_declared_block_name(id); + return compiler->context->allocate_name(name); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +spvc_result spvc_compiler_get_buffer_block_decorations(spvc_compiler compiler, spvc_variable_id id, + const SpvDecoration **decorations, size_t *num_decorations) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto flags = compiler->compiler->get_buffer_block_flags(id); + auto bitset = spvc_allocate>(); + + flags.for_each_bit([&](uint32_t bit) { bitset->buffer.push_back(static_cast(bit)); }); + + *decorations = bitset->buffer.data(); + *num_decorations = bitset->buffer.size(); + compiler->context->allocations.push_back(std::move(bitset)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +unsigned spvc_msl_get_aux_buffer_struct_version(void) +{ + return SPVC_MSL_AUX_BUFFER_STRUCT_VERSION; +} + +void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr) +{ +#if SPIRV_CROSS_C_API_MSL + // Crude, but works. + MSLShaderInput attr_default; + attr->location = attr_default.location; + attr->format = static_cast(attr_default.format); + attr->builtin = static_cast(attr_default.builtin); +#else + memset(attr, 0, sizeof(*attr)); +#endif +} + +void spvc_msl_shader_input_init(spvc_msl_shader_input *input) +{ +#if SPIRV_CROSS_C_API_MSL + MSLShaderInput input_default; + input->location = input_default.location; + input->format = static_cast(input_default.format); + input->builtin = static_cast(input_default.builtin); + input->vecsize = input_default.vecsize; +#else + memset(input, 0, sizeof(*input)); +#endif +} + +void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_MSL + MSLResourceBinding binding_default; + binding->desc_set = binding_default.desc_set; + binding->binding = binding_default.binding; + binding->msl_buffer = binding_default.msl_buffer; + binding->msl_texture = binding_default.msl_texture; + binding->msl_sampler = binding_default.msl_sampler; + binding->stage = static_cast(binding_default.stage); +#else + memset(binding, 0, sizeof(*binding)); +#endif +} + +void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_HLSL + HLSLResourceBinding binding_default; + binding->desc_set = binding_default.desc_set; + binding->binding = binding_default.binding; + binding->cbv.register_binding = binding_default.cbv.register_binding; + binding->cbv.register_space = binding_default.cbv.register_space; + binding->srv.register_binding = binding_default.srv.register_binding; + binding->srv.register_space = binding_default.srv.register_space; + binding->uav.register_binding = binding_default.uav.register_binding; + binding->uav.register_space = binding_default.uav.register_space; + binding->sampler.register_binding = binding_default.sampler.register_binding; + binding->sampler.register_space = binding_default.sampler.register_space; + binding->stage = static_cast(binding_default.stage); +#else + memset(binding, 0, sizeof(*binding)); +#endif +} + +void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler) +{ +#if SPIRV_CROSS_C_API_MSL + MSLConstexprSampler defaults; + sampler->anisotropy_enable = defaults.anisotropy_enable ? SPVC_TRUE : SPVC_FALSE; + sampler->border_color = static_cast(defaults.border_color); + sampler->compare_enable = defaults.compare_enable ? SPVC_TRUE : SPVC_FALSE; + sampler->coord = static_cast(defaults.coord); + sampler->compare_func = static_cast(defaults.compare_func); + sampler->lod_clamp_enable = defaults.lod_clamp_enable ? SPVC_TRUE : SPVC_FALSE; + sampler->lod_clamp_max = defaults.lod_clamp_max; + sampler->lod_clamp_min = defaults.lod_clamp_min; + sampler->mag_filter = static_cast(defaults.mag_filter); + sampler->min_filter = static_cast(defaults.min_filter); + sampler->mip_filter = static_cast(defaults.mip_filter); + sampler->max_anisotropy = defaults.max_anisotropy; + sampler->s_address = static_cast(defaults.s_address); + sampler->t_address = static_cast(defaults.t_address); + sampler->r_address = static_cast(defaults.r_address); +#else + memset(sampler, 0, sizeof(*sampler)); +#endif +} + +void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + MSLConstexprSampler defaults; + conv->planes = defaults.planes; + conv->resolution = static_cast(defaults.resolution); + conv->chroma_filter = static_cast(defaults.chroma_filter); + conv->x_chroma_offset = static_cast(defaults.x_chroma_offset); + conv->y_chroma_offset = static_cast(defaults.y_chroma_offset); + for (int i = 0; i < 4; i++) + conv->swizzle[i] = static_cast(defaults.swizzle[i]); + conv->ycbcr_model = static_cast(defaults.ycbcr_model); + conv->ycbcr_range = static_cast(defaults.ycbcr_range); +#else + memset(conv, 0, sizeof(*conv)); +#endif +} + +unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler) +{ + return compiler->compiler->get_current_id_bound(); +} + +void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch) +{ + *major = SPVC_C_API_VERSION_MAJOR; + *minor = SPVC_C_API_VERSION_MINOR; + *patch = SPVC_C_API_VERSION_PATCH; +} + +const char *spvc_get_commit_revision_and_timestamp(void) +{ +#ifdef HAVE_SPIRV_CROSS_GIT_VERSION + return SPIRV_CROSS_GIT_REVISION; +#else + return ""; +#endif +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/dep/spirv-cross/spirv_cross_c.h b/dep/spirv-cross/spirv_cross_c.h new file mode 100644 index 000000000..62d91c195 --- /dev/null +++ b/dep/spirv-cross/spirv_cross_c.h @@ -0,0 +1,992 @@ +/* + * Copyright 2019-2020 Hans-Kristian Arntzen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_C_API_H +#define SPIRV_CROSS_C_API_H + +#include +#include "spirv.h" + +/* + * C89-compatible wrapper for SPIRV-Cross' API. + * Documentation here is sparse unless the behavior does not map 1:1 with C++ API. + * It is recommended to look at the canonical C++ API for more detailed information. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Bumped if ABI or API breaks backwards compatibility. */ +#define SPVC_C_API_VERSION_MAJOR 0 +/* Bumped if APIs or enumerations are added in a backwards compatible way. */ +#define SPVC_C_API_VERSION_MINOR 44 +/* Bumped if internal implementation details change. */ +#define SPVC_C_API_VERSION_PATCH 0 + +#if !defined(SPVC_PUBLIC_API) +#if defined(SPVC_EXPORT_SYMBOLS) +/* Exports symbols. Standard C calling convention is used. */ +#if defined(__GNUC__) +#define SPVC_PUBLIC_API __attribute__((visibility("default"))) +#elif defined(_MSC_VER) +#define SPVC_PUBLIC_API __declspec(dllexport) +#else +#define SPVC_PUBLIC_API +#endif +#else +#define SPVC_PUBLIC_API +#endif +#endif + +/* + * Gets the SPVC_C_API_VERSION_* used to build this library. + * Can be used to check for ABI mismatch if so-versioning did not catch it. + */ +SPVC_PUBLIC_API void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch); + +/* Gets a human readable version string to identify which commit a particular binary was created from. */ +SPVC_PUBLIC_API const char *spvc_get_commit_revision_and_timestamp(void); + +/* These types are opaque to the user. */ +typedef struct spvc_context_s *spvc_context; +typedef struct spvc_parsed_ir_s *spvc_parsed_ir; +typedef struct spvc_compiler_s *spvc_compiler; +typedef struct spvc_compiler_options_s *spvc_compiler_options; +typedef struct spvc_resources_s *spvc_resources; +struct spvc_type_s; +typedef const struct spvc_type_s *spvc_type; +typedef struct spvc_constant_s *spvc_constant; +struct spvc_set_s; +typedef const struct spvc_set_s *spvc_set; + +/* + * Shallow typedefs. All SPIR-V IDs are plain 32-bit numbers, but this helps communicate which data is used. + * Maps to a SPIRType. + */ +typedef SpvId spvc_type_id; +/* Maps to a SPIRVariable. */ +typedef SpvId spvc_variable_id; +/* Maps to a SPIRConstant. */ +typedef SpvId spvc_constant_id; + +/* See C++ API. */ +typedef struct spvc_reflected_resource +{ + spvc_variable_id id; + spvc_type_id base_type_id; + spvc_type_id type_id; + const char *name; +} spvc_reflected_resource; + +/* See C++ API. */ +typedef struct spvc_entry_point +{ + SpvExecutionModel execution_model; + const char *name; +} spvc_entry_point; + +/* See C++ API. */ +typedef struct spvc_combined_image_sampler +{ + spvc_variable_id combined_id; + spvc_variable_id image_id; + spvc_variable_id sampler_id; +} spvc_combined_image_sampler; + +/* See C++ API. */ +typedef struct spvc_specialization_constant +{ + spvc_constant_id id; + unsigned constant_id; +} spvc_specialization_constant; + +/* See C++ API. */ +typedef struct spvc_buffer_range +{ + unsigned index; + size_t offset; + size_t range; +} spvc_buffer_range; + +/* See C++ API. */ +typedef struct spvc_hlsl_root_constants +{ + unsigned start; + unsigned end; + unsigned binding; + unsigned space; +} spvc_hlsl_root_constants; + +/* See C++ API. */ +typedef struct spvc_hlsl_vertex_attribute_remap +{ + unsigned location; + const char *semantic; +} spvc_hlsl_vertex_attribute_remap; + +/* + * Be compatible with non-C99 compilers, which do not have stdbool. + * Only recent MSVC compilers supports this for example, and ideally SPIRV-Cross should be linkable + * from a wide range of compilers in its C wrapper. + */ +typedef unsigned char spvc_bool; +#define SPVC_TRUE ((spvc_bool)1) +#define SPVC_FALSE ((spvc_bool)0) + +typedef enum spvc_result +{ + /* Success. */ + SPVC_SUCCESS = 0, + + /* The SPIR-V is invalid. Should have been caught by validation ideally. */ + SPVC_ERROR_INVALID_SPIRV = -1, + + /* The SPIR-V might be valid or invalid, but SPIRV-Cross currently cannot correctly translate this to your target language. */ + SPVC_ERROR_UNSUPPORTED_SPIRV = -2, + + /* If for some reason we hit this, new or malloc failed. */ + SPVC_ERROR_OUT_OF_MEMORY = -3, + + /* Invalid API argument. */ + SPVC_ERROR_INVALID_ARGUMENT = -4, + + SPVC_ERROR_INT_MAX = 0x7fffffff +} spvc_result; + +typedef enum spvc_capture_mode +{ + /* The Parsed IR payload will be copied, and the handle can be reused to create other compiler instances. */ + SPVC_CAPTURE_MODE_COPY = 0, + + /* + * The payload will now be owned by the compiler. + * parsed_ir should now be considered a dead blob and must not be used further. + * This is optimal for performance and should be the go-to option. + */ + SPVC_CAPTURE_MODE_TAKE_OWNERSHIP = 1, + + SPVC_CAPTURE_MODE_INT_MAX = 0x7fffffff +} spvc_capture_mode; + +typedef enum spvc_backend +{ + /* This backend can only perform reflection, no compiler options are supported. Maps to spirv_cross::Compiler. */ + SPVC_BACKEND_NONE = 0, + SPVC_BACKEND_GLSL = 1, /* spirv_cross::CompilerGLSL */ + SPVC_BACKEND_HLSL = 2, /* CompilerHLSL */ + SPVC_BACKEND_MSL = 3, /* CompilerMSL */ + SPVC_BACKEND_CPP = 4, /* CompilerCPP */ + SPVC_BACKEND_JSON = 5, /* CompilerReflection w/ JSON backend */ + SPVC_BACKEND_INT_MAX = 0x7fffffff +} spvc_backend; + +/* Maps to C++ API. */ +typedef enum spvc_resource_type +{ + SPVC_RESOURCE_TYPE_UNKNOWN = 0, + SPVC_RESOURCE_TYPE_UNIFORM_BUFFER = 1, + SPVC_RESOURCE_TYPE_STORAGE_BUFFER = 2, + SPVC_RESOURCE_TYPE_STAGE_INPUT = 3, + SPVC_RESOURCE_TYPE_STAGE_OUTPUT = 4, + SPVC_RESOURCE_TYPE_SUBPASS_INPUT = 5, + SPVC_RESOURCE_TYPE_STORAGE_IMAGE = 6, + SPVC_RESOURCE_TYPE_SAMPLED_IMAGE = 7, + SPVC_RESOURCE_TYPE_ATOMIC_COUNTER = 8, + SPVC_RESOURCE_TYPE_PUSH_CONSTANT = 9, + SPVC_RESOURCE_TYPE_SEPARATE_IMAGE = 10, + SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS = 11, + SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE = 12, + SPVC_RESOURCE_TYPE_RAY_QUERY = 13, + SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff +} spvc_resource_type; + +/* Maps to spirv_cross::SPIRType::BaseType. */ +typedef enum spvc_basetype +{ + SPVC_BASETYPE_UNKNOWN = 0, + SPVC_BASETYPE_VOID = 1, + SPVC_BASETYPE_BOOLEAN = 2, + SPVC_BASETYPE_INT8 = 3, + SPVC_BASETYPE_UINT8 = 4, + SPVC_BASETYPE_INT16 = 5, + SPVC_BASETYPE_UINT16 = 6, + SPVC_BASETYPE_INT32 = 7, + SPVC_BASETYPE_UINT32 = 8, + SPVC_BASETYPE_INT64 = 9, + SPVC_BASETYPE_UINT64 = 10, + SPVC_BASETYPE_ATOMIC_COUNTER = 11, + SPVC_BASETYPE_FP16 = 12, + SPVC_BASETYPE_FP32 = 13, + SPVC_BASETYPE_FP64 = 14, + SPVC_BASETYPE_STRUCT = 15, + SPVC_BASETYPE_IMAGE = 16, + SPVC_BASETYPE_SAMPLED_IMAGE = 17, + SPVC_BASETYPE_SAMPLER = 18, + SPVC_BASETYPE_ACCELERATION_STRUCTURE = 19, + + SPVC_BASETYPE_INT_MAX = 0x7fffffff +} spvc_basetype; + +#define SPVC_COMPILER_OPTION_COMMON_BIT 0x1000000 +#define SPVC_COMPILER_OPTION_GLSL_BIT 0x2000000 +#define SPVC_COMPILER_OPTION_HLSL_BIT 0x4000000 +#define SPVC_COMPILER_OPTION_MSL_BIT 0x8000000 +#define SPVC_COMPILER_OPTION_LANG_BITS 0x0f000000 +#define SPVC_COMPILER_OPTION_ENUM_BITS 0xffffff + +#define SPVC_MAKE_MSL_VERSION(major, minor, patch) ((major) * 10000 + (minor) * 100 + (patch)) + +/* Maps to C++ API. */ +typedef enum spvc_msl_platform +{ + SPVC_MSL_PLATFORM_IOS = 0, + SPVC_MSL_PLATFORM_MACOS = 1, + SPVC_MSL_PLATFORM_MAX_INT = 0x7fffffff +} spvc_msl_platform; + +/* Maps to C++ API. */ +typedef enum spvc_msl_index_type +{ + SPVC_MSL_INDEX_TYPE_NONE = 0, + SPVC_MSL_INDEX_TYPE_UINT16 = 1, + SPVC_MSL_INDEX_TYPE_UINT32 = 2, + SPVC_MSL_INDEX_TYPE_MAX_INT = 0x7fffffff +} spvc_msl_index_type; + +/* Maps to C++ API. */ +typedef enum spvc_msl_shader_input_format +{ + SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = 0, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = 1, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = 2, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = 3, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = 4, + + /* Deprecated names. */ + SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_INPUT_FORMAT_OTHER, + SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_INPUT_FORMAT_UINT8, + SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_INPUT_FORMAT_UINT16, + + SPVC_MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff +} spvc_msl_shader_input_format, spvc_msl_vertex_format; + +/* Maps to C++ API. Deprecated; use spvc_msl_shader_input. */ +typedef struct spvc_msl_vertex_attribute +{ + unsigned location; + + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + unsigned msl_buffer; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + unsigned msl_offset; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + unsigned msl_stride; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + spvc_bool per_instance; + + spvc_msl_vertex_format format; + SpvBuiltIn builtin; +} spvc_msl_vertex_attribute; + +/* + * Initializes the vertex attribute struct. + */ +SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr); + +/* Maps to C++ API. */ +typedef struct spvc_msl_shader_input +{ + unsigned location; + spvc_msl_vertex_format format; + SpvBuiltIn builtin; + unsigned vecsize; +} spvc_msl_shader_input; + +/* + * Initializes the shader input struct. + */ +SPVC_PUBLIC_API void spvc_msl_shader_input_init(spvc_msl_shader_input *input); + +/* Maps to C++ API. */ +typedef struct spvc_msl_resource_binding +{ + SpvExecutionModel stage; + unsigned desc_set; + unsigned binding; + unsigned msl_buffer; + unsigned msl_texture; + unsigned msl_sampler; +} spvc_msl_resource_binding; + +/* + * Initializes the resource binding struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding); + +#define SPVC_MSL_PUSH_CONSTANT_DESC_SET (~(0u)) +#define SPVC_MSL_PUSH_CONSTANT_BINDING (0) +#define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u)) +#define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u)) +#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u)) + +/* Obsolete. Sticks around for backwards compatibility. */ +#define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1 + +/* Runtime check for incompatibility. Obsolete. */ +SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void); + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_coord +{ + SPVC_MSL_SAMPLER_COORD_NORMALIZED = 0, + SPVC_MSL_SAMPLER_COORD_PIXEL = 1, + SPVC_MSL_SAMPLER_INT_MAX = 0x7fffffff +} spvc_msl_sampler_coord; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_filter +{ + SPVC_MSL_SAMPLER_FILTER_NEAREST = 0, + SPVC_MSL_SAMPLER_FILTER_LINEAR = 1, + SPVC_MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff +} spvc_msl_sampler_filter; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_mip_filter +{ + SPVC_MSL_SAMPLER_MIP_FILTER_NONE = 0, + SPVC_MSL_SAMPLER_MIP_FILTER_NEAREST = 1, + SPVC_MSL_SAMPLER_MIP_FILTER_LINEAR = 2, + SPVC_MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff +} spvc_msl_sampler_mip_filter; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_address +{ + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, + SPVC_MSL_SAMPLER_ADDRESS_REPEAT = 3, + SPVC_MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, + SPVC_MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff +} spvc_msl_sampler_address; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_compare_func +{ + SPVC_MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, + SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS = 1, + SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, + SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, + SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, + SPVC_MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, + SPVC_MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, + SPVC_MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, + SPVC_MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff +} spvc_msl_sampler_compare_func; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_border_color +{ + SPVC_MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, + SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, + SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, + SPVC_MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff +} spvc_msl_sampler_border_color; + +/* Maps to C++ API. */ +typedef enum spvc_msl_format_resolution +{ + SPVC_MSL_FORMAT_RESOLUTION_444 = 0, + SPVC_MSL_FORMAT_RESOLUTION_422, + SPVC_MSL_FORMAT_RESOLUTION_420, + SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +} spvc_msl_format_resolution; + +/* Maps to C++ API. */ +typedef enum spvc_msl_chroma_location +{ + SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + SPVC_MSL_CHROMA_LOCATION_MIDPOINT, + SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +} spvc_msl_chroma_location; + +/* Maps to C++ API. */ +typedef enum spvc_msl_component_swizzle +{ + SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + SPVC_MSL_COMPONENT_SWIZZLE_ZERO, + SPVC_MSL_COMPONENT_SWIZZLE_ONE, + SPVC_MSL_COMPONENT_SWIZZLE_R, + SPVC_MSL_COMPONENT_SWIZZLE_G, + SPVC_MSL_COMPONENT_SWIZZLE_B, + SPVC_MSL_COMPONENT_SWIZZLE_A, + SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +} spvc_msl_component_swizzle; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_ycbcr_model_conversion +{ + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_model_conversion; + +/* Maps to C+ API. */ +typedef enum spvc_msl_sampler_ycbcr_range +{ + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_range; + +/* Maps to C++ API. */ +typedef struct spvc_msl_constexpr_sampler +{ + spvc_msl_sampler_coord coord; + spvc_msl_sampler_filter min_filter; + spvc_msl_sampler_filter mag_filter; + spvc_msl_sampler_mip_filter mip_filter; + spvc_msl_sampler_address s_address; + spvc_msl_sampler_address t_address; + spvc_msl_sampler_address r_address; + spvc_msl_sampler_compare_func compare_func; + spvc_msl_sampler_border_color border_color; + float lod_clamp_min; + float lod_clamp_max; + int max_anisotropy; + + spvc_bool compare_enable; + spvc_bool lod_clamp_enable; + spvc_bool anisotropy_enable; +} spvc_msl_constexpr_sampler; + +/* + * Initializes the constexpr sampler struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler); + +/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */ +typedef struct spvc_msl_sampler_ycbcr_conversion +{ + unsigned planes; + spvc_msl_format_resolution resolution; + spvc_msl_sampler_filter chroma_filter; + spvc_msl_chroma_location x_chroma_offset; + spvc_msl_chroma_location y_chroma_offset; + spvc_msl_component_swizzle swizzle[4]; + spvc_msl_sampler_ycbcr_model_conversion ycbcr_model; + spvc_msl_sampler_ycbcr_range ycbcr_range; + unsigned bpc; +} spvc_msl_sampler_ycbcr_conversion; + +/* + * Initializes the constexpr sampler struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv); + +/* Maps to C++ API. */ +typedef enum spvc_hlsl_binding_flag_bits +{ + SPVC_HLSL_BINDING_AUTO_NONE_BIT = 0, + SPVC_HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, + SPVC_HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, + SPVC_HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, + SPVC_HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, + SPVC_HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, + SPVC_HLSL_BINDING_AUTO_ALL = 0x7fffffff +} spvc_hlsl_binding_flag_bits; +typedef unsigned spvc_hlsl_binding_flags; + +#define SPVC_HLSL_PUSH_CONSTANT_DESC_SET (~(0u)) +#define SPVC_HLSL_PUSH_CONSTANT_BINDING (0) + +/* Maps to C++ API. */ +typedef struct spvc_hlsl_resource_binding_mapping +{ + unsigned register_space; + unsigned register_binding; +} spvc_hlsl_resource_binding_mapping; + +typedef struct spvc_hlsl_resource_binding +{ + SpvExecutionModel stage; + unsigned desc_set; + unsigned binding; + + spvc_hlsl_resource_binding_mapping cbv, uav, srv, sampler; +} spvc_hlsl_resource_binding; + +/* + * Initializes the resource binding struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding); + +/* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ +typedef enum spvc_compiler_option +{ + SPVC_COMPILER_OPTION_UNKNOWN = 0, + + SPVC_COMPILER_OPTION_FORCE_TEMPORARY = 1 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FLATTEN_MULTIDIMENSIONAL_ARRAYS = 2 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FIXUP_DEPTH_CONVENTION = 3 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FLIP_VERTEX_Y = 4 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE = 5 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_SEPARATE_SHADER_OBJECTS = 6 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION = 7 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_VERSION = 8 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES = 9 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS = 10 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_FLOAT_PRECISION_HIGHP = 11 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_INT_PRECISION_HIGHP = 12 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL = 13 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_POINT_SIZE_COMPAT = 14 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_POINT_COORD_COMPAT = 15 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE = 16 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT, + + /* Obsolete, use SWIZZLE_BUFFER_INDEX instead. */ + SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_TESS_FACTOR_OUTPUT_BUFFER_INDEX = 23 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_WORKGROUP_INDEX = 24 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_BUILTIN = 25 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISABLE_RASTERIZATION = 26 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_CAPTURE_OUTPUT_TO_BUFFER = 27 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SWIZZLE_TEXTURE_SAMPLES = 28 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS = 29 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TESS_DOMAIN_ORIGIN_LOWER_LEFT = 30 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_PLATFORM = 31 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS = 32 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_GLSL_EMIT_PUSH_CONSTANT_AS_UNIFORM_BUFFER = 33 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE = 34 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS = 35 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX = 36 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT, + + /* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */ + SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES = 50 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS = 51 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION = 52 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV = 53 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES = 54 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV = 55 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK = 56 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN = 57 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN = 58 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING = 59 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES = 60 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP = 61 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX = 62 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX = 63 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION = 64 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE = 65 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS = 66 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING = 67 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT = 68 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT = 69 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID = 70 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS = 71 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS = 72 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS = 73 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE = 74 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING = 75 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff +} spvc_compiler_option; + +/* + * Context is the highest-level API construct. + * The context owns all memory allocations made by its child object hierarchy, including various non-opaque structs and strings. + * This means that the API user only has to care about one "destroy" call ever when using the C API. + * All pointers handed out by the APIs are only valid as long as the context + * is alive and spvc_context_release_allocations has not been called. + */ +SPVC_PUBLIC_API spvc_result spvc_context_create(spvc_context *context); + +/* Frees all memory allocations and objects associated with the context and its child objects. */ +SPVC_PUBLIC_API void spvc_context_destroy(spvc_context context); + +/* Frees all memory allocations and objects associated with the context and its child objects, but keeps the context alive. */ +SPVC_PUBLIC_API void spvc_context_release_allocations(spvc_context context); + +/* Get the string for the last error which was logged. */ +SPVC_PUBLIC_API const char *spvc_context_get_last_error_string(spvc_context context); + +/* Get notified in a callback when an error triggers. Useful for debugging. */ +typedef void (*spvc_error_callback)(void *userdata, const char *error); +SPVC_PUBLIC_API void spvc_context_set_error_callback(spvc_context context, spvc_error_callback cb, void *userdata); + +/* SPIR-V parsing interface. Maps to Parser which then creates a ParsedIR, and that IR is extracted into the handle. */ +SPVC_PUBLIC_API spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, size_t word_count, + spvc_parsed_ir *parsed_ir); + +/* + * Create a compiler backend. Capture mode controls if we construct by copy or move semantics. + * It is always recommended to use SPVC_CAPTURE_MODE_TAKE_OWNERSHIP if you only intend to cross-compile the IR once. + */ +SPVC_PUBLIC_API spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend backend, + spvc_parsed_ir parsed_ir, spvc_capture_mode mode, + spvc_compiler *compiler); + +/* Maps directly to C++ API. */ +SPVC_PUBLIC_API unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler); + +/* Create compiler options, which will initialize defaults. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_create_compiler_options(spvc_compiler compiler, + spvc_compiler_options *options); +/* Override options. Will return error if e.g. MSL options are used for the HLSL backend, etc. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_bool(spvc_compiler_options options, + spvc_compiler_option option, spvc_bool value); +SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, + spvc_compiler_option option, unsigned value); +/* Set compiler options. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_install_compiler_options(spvc_compiler compiler, + spvc_compiler_options options); + +/* Compile IR into a string. *source is owned by the context, and caller must not free it themselves. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source); + +/* Maps to C++ API. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_add_header_line(spvc_compiler compiler, const char *line); +SPVC_PUBLIC_API spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *ext); +SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id); + +SPVC_PUBLIC_API spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id); + +/* + * HLSL specifics. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler, + const spvc_hlsl_root_constants *constant_info, + size_t count); +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_compiler compiler, + const spvc_hlsl_vertex_attribute_remap *remap, + size_t remaps); +SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler); + +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, + spvc_hlsl_binding_flags flags); + +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler, + const spvc_hlsl_resource_binding *binding); +SPVC_PUBLIC_API spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler, + SpvExecutionModel model, + unsigned set, + unsigned binding); + +/* + * MSL specifics. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler); + +/* Obsolete. Renamed to needs_swizzle_buffer. */ +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler); + +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, + const spvc_msl_vertex_attribute *attrs); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, + const spvc_msl_resource_binding *binding); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, + const spvc_msl_shader_input *input); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address); + +/* Obsolete, use is_shader_input_used. */ +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location); + +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, + SpvExecutionModel model, + unsigned set, + unsigned binding); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components); + +SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id); + +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index); + +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding); + +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, const char *suffix); +SPVC_PUBLIC_API const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler); + +/* + * Reflect resources. + * Maps almost 1:1 to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_interface_variables(spvc_compiler compiler, spvc_set *set); +SPVC_PUBLIC_API spvc_result spvc_compiler_set_enabled_interface_variables(spvc_compiler compiler, spvc_set set); +SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources(spvc_compiler compiler, spvc_resources *resources); +SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources_for_active_variables(spvc_compiler compiler, + spvc_resources *resources, + spvc_set active); +SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, spvc_resource_type type, + const spvc_reflected_resource **resource_list, + size_t *resource_size); + +/* + * Decorations. + * Maps to C++ API. + */ +SPVC_PUBLIC_API void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + unsigned argument); +SPVC_PUBLIC_API void spvc_compiler_set_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + const char *argument); +SPVC_PUBLIC_API void spvc_compiler_set_name(spvc_compiler compiler, SpvId id, const char *argument); +SPVC_PUBLIC_API void spvc_compiler_set_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration, unsigned argument); +SPVC_PUBLIC_API void spvc_compiler_set_member_decoration_string(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration, + const char *argument); +SPVC_PUBLIC_API void spvc_compiler_set_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + const char *argument); +SPVC_PUBLIC_API void spvc_compiler_unset_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); +SPVC_PUBLIC_API void spvc_compiler_unset_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); + +SPVC_PUBLIC_API spvc_bool spvc_compiler_has_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); +SPVC_PUBLIC_API spvc_bool spvc_compiler_has_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_name(spvc_compiler compiler, SpvId id); +SPVC_PUBLIC_API unsigned spvc_compiler_get_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_decoration_string(spvc_compiler compiler, SpvId id, + SpvDecoration decoration); +SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index); + +/* + * Entry points. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, + const spvc_entry_point **entry_points, + size_t *num_entry_points); +SPVC_PUBLIC_API spvc_result spvc_compiler_set_entry_point(spvc_compiler compiler, const char *name, + SpvExecutionModel model); +SPVC_PUBLIC_API spvc_result spvc_compiler_rename_entry_point(spvc_compiler compiler, const char *old_name, + const char *new_name, SpvExecutionModel model); +SPVC_PUBLIC_API const char *spvc_compiler_get_cleansed_entry_point_name(spvc_compiler compiler, const char *name, + SpvExecutionModel model); +SPVC_PUBLIC_API void spvc_compiler_set_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); +SPVC_PUBLIC_API void spvc_compiler_unset_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); +SPVC_PUBLIC_API void spvc_compiler_set_execution_mode_with_arguments(spvc_compiler compiler, SpvExecutionMode mode, + unsigned arg0, unsigned arg1, unsigned arg2); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_execution_modes(spvc_compiler compiler, const SpvExecutionMode **modes, + size_t *num_modes); +SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler compiler, SpvExecutionMode mode); +SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler, + SpvExecutionMode mode, unsigned index); +SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler); + +/* + * Type query interface. + * Maps to C++ API, except it's read-only. + */ +SPVC_PUBLIC_API spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id); + +/* Pulls out SPIRType::self. This effectively gives the type ID without array or pointer qualifiers. + * This is necessary when reflecting decoration/name information on members of a struct, + * which are placed in the base type, not the qualified type. + * This is similar to spvc_reflected_resource::base_type_id. */ +SPVC_PUBLIC_API spvc_type_id spvc_type_get_base_type_id(spvc_type type); + +SPVC_PUBLIC_API spvc_basetype spvc_type_get_basetype(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_bit_width(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_vector_size(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_columns(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_num_array_dimensions(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_array_dimension_is_literal(spvc_type type, unsigned dimension); +SPVC_PUBLIC_API SpvId spvc_type_get_array_dimension(spvc_type type, unsigned dimension); +SPVC_PUBLIC_API unsigned spvc_type_get_num_member_types(spvc_type type); +SPVC_PUBLIC_API spvc_type_id spvc_type_get_member_type(spvc_type type, unsigned index); +SPVC_PUBLIC_API SpvStorageClass spvc_type_get_storage_class(spvc_type type); + +/* Image type query. */ +SPVC_PUBLIC_API spvc_type_id spvc_type_get_image_sampled_type(spvc_type type); +SPVC_PUBLIC_API SpvDim spvc_type_get_image_dimension(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_depth(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_arrayed(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_multisampled(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_storage(spvc_type type); +SPVC_PUBLIC_API SpvImageFormat spvc_type_get_image_storage_format(spvc_type type); +SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_type type); + +/* + * Buffer layout query. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, + spvc_type struct_type, size_t array_size, size_t *size); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size); + +SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, + spvc_type type, unsigned index, unsigned *offset); +SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_array_stride(spvc_compiler compiler, + spvc_type type, unsigned index, unsigned *stride); +SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_matrix_stride(spvc_compiler compiler, + spvc_type type, unsigned index, unsigned *stride); + +/* + * Workaround helper functions. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_build_dummy_sampler_for_combined_images(spvc_compiler compiler, spvc_variable_id *id); +SPVC_PUBLIC_API spvc_result spvc_compiler_build_combined_image_samplers(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_combined_image_samplers(spvc_compiler compiler, + const spvc_combined_image_sampler **samplers, + size_t *num_samplers); + +/* + * Constants + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_specialization_constants(spvc_compiler compiler, + const spvc_specialization_constant **constants, + size_t *num_constants); +SPVC_PUBLIC_API spvc_constant spvc_compiler_get_constant_handle(spvc_compiler compiler, + spvc_constant_id id); + +SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc_compiler compiler, + spvc_specialization_constant *x, + spvc_specialization_constant *y, + spvc_specialization_constant *z); + +/* + * Buffer ranges + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler, + spvc_variable_id id, + const spvc_buffer_range **ranges, + size_t *num_ranges); + +/* + * No stdint.h until C99, sigh :( + * For smaller types, the result is sign or zero-extended as appropriate. + * Maps to C++ API. + * TODO: The SPIRConstant query interface and modification interface is not quite complete. + */ +SPVC_PUBLIC_API float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API float spvc_constant_get_scalar_fp32(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API double spvc_constant_get_scalar_fp64(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u32(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API int spvc_constant_get_scalar_i32(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u16(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API int spvc_constant_get_scalar_i16(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u8(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API int spvc_constant_get_scalar_i8(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API void spvc_constant_get_subconstants(spvc_constant constant, const spvc_constant_id **constituents, size_t *count); +SPVC_PUBLIC_API spvc_type_id spvc_constant_get_type(spvc_constant constant); + +/* + * Misc reflection + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_bool spvc_compiler_get_binary_offset_for_decoration(spvc_compiler compiler, + spvc_variable_id id, + SpvDecoration decoration, + unsigned *word_offset); + +SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_is_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_get_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id, + spvc_variable_id *counter_id); + +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_capabilities(spvc_compiler compiler, + const SpvCapability **capabilities, + size_t *num_capabilities); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_extensions(spvc_compiler compiler, const char ***extensions, + size_t *num_extensions); + +SPVC_PUBLIC_API const char *spvc_compiler_get_remapped_declared_block_name(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_buffer_block_decorations(spvc_compiler compiler, spvc_variable_id id, + const SpvDecoration **decorations, + size_t *num_decorations); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/dep/spirv-cross/spirv_cross_containers.hpp b/dep/spirv-cross/spirv_cross_containers.hpp new file mode 100644 index 000000000..c68f670b1 --- /dev/null +++ b/dep/spirv-cross/spirv_cross_containers.hpp @@ -0,0 +1,747 @@ +/* + * Copyright 2019-2020 Hans-Kristian Arntzen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_CONTAINERS_HPP +#define SPIRV_CROSS_CONTAINERS_HPP + +#include "spirv_cross_error_handling.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef SPIRV_CROSS_NAMESPACE_OVERRIDE +#define SPIRV_CROSS_NAMESPACE SPIRV_CROSS_NAMESPACE_OVERRIDE +#else +#define SPIRV_CROSS_NAMESPACE spirv_cross +#endif + +namespace SPIRV_CROSS_NAMESPACE +{ +#ifndef SPIRV_CROSS_FORCE_STL_TYPES +// std::aligned_storage does not support size == 0, so roll our own. +template +class AlignedBuffer +{ +public: + T *data() + { +#if defined(_MSC_VER) && _MSC_VER < 1900 + // MSVC 2013 workarounds, sigh ... + // Only use this workaround on MSVC 2013 due to some confusion around default initialized unions. + // Spec seems to suggest the memory will be zero-initialized, which is *not* what we want. + return reinterpret_cast(u.aligned_char); +#else + return reinterpret_cast(aligned_char); +#endif + } + +private: +#if defined(_MSC_VER) && _MSC_VER < 1900 + // MSVC 2013 workarounds, sigh ... + union + { + char aligned_char[sizeof(T) * N]; + double dummy_aligner; + } u; +#else + alignas(T) char aligned_char[sizeof(T) * N]; +#endif +}; + +template +class AlignedBuffer +{ +public: + T *data() + { + return nullptr; + } +}; + +// An immutable version of SmallVector which erases type information about storage. +template +class VectorView +{ +public: + T &operator[](size_t i) SPIRV_CROSS_NOEXCEPT + { + return ptr[i]; + } + + const T &operator[](size_t i) const SPIRV_CROSS_NOEXCEPT + { + return ptr[i]; + } + + bool empty() const SPIRV_CROSS_NOEXCEPT + { + return buffer_size == 0; + } + + size_t size() const SPIRV_CROSS_NOEXCEPT + { + return buffer_size; + } + + T *data() SPIRV_CROSS_NOEXCEPT + { + return ptr; + } + + const T *data() const SPIRV_CROSS_NOEXCEPT + { + return ptr; + } + + T *begin() SPIRV_CROSS_NOEXCEPT + { + return ptr; + } + + T *end() SPIRV_CROSS_NOEXCEPT + { + return ptr + buffer_size; + } + + const T *begin() const SPIRV_CROSS_NOEXCEPT + { + return ptr; + } + + const T *end() const SPIRV_CROSS_NOEXCEPT + { + return ptr + buffer_size; + } + + T &front() SPIRV_CROSS_NOEXCEPT + { + return ptr[0]; + } + + const T &front() const SPIRV_CROSS_NOEXCEPT + { + return ptr[0]; + } + + T &back() SPIRV_CROSS_NOEXCEPT + { + return ptr[buffer_size - 1]; + } + + const T &back() const SPIRV_CROSS_NOEXCEPT + { + return ptr[buffer_size - 1]; + } + + // Makes it easier to consume SmallVector. +#if defined(_MSC_VER) && _MSC_VER < 1900 + explicit operator std::vector() const + { + // Another MSVC 2013 workaround. It does not understand lvalue/rvalue qualified operations. + return std::vector(ptr, ptr + buffer_size); + } +#else + // Makes it easier to consume SmallVector. + explicit operator std::vector() const & + { + return std::vector(ptr, ptr + buffer_size); + } + + // If we are converting as an r-value, we can pilfer our elements. + explicit operator std::vector() && + { + return std::vector(std::make_move_iterator(ptr), std::make_move_iterator(ptr + buffer_size)); + } +#endif + + // Avoid sliced copies. Base class should only be read as a reference. + VectorView(const VectorView &) = delete; + void operator=(const VectorView &) = delete; + +protected: + VectorView() = default; + T *ptr = nullptr; + size_t buffer_size = 0; +}; + +// Simple vector which supports up to N elements inline, without malloc/free. +// We use a lot of throwaway vectors all over the place which triggers allocations. +// This class only implements the subset of std::vector we need in SPIRV-Cross. +// It is *NOT* a drop-in replacement in general projects. +template +class SmallVector : public VectorView +{ +public: + SmallVector() SPIRV_CROSS_NOEXCEPT + { + this->ptr = stack_storage.data(); + buffer_capacity = N; + } + + SmallVector(const T *arg_list_begin, const T *arg_list_end) SPIRV_CROSS_NOEXCEPT : SmallVector() + { + auto count = size_t(arg_list_end - arg_list_begin); + reserve(count); + for (size_t i = 0; i < count; i++, arg_list_begin++) + new (&this->ptr[i]) T(*arg_list_begin); + this->buffer_size = count; + } + + SmallVector(std::initializer_list init) SPIRV_CROSS_NOEXCEPT : SmallVector(init.begin(), init.end()) + { + } + + SmallVector(SmallVector &&other) SPIRV_CROSS_NOEXCEPT : SmallVector() + { + *this = std::move(other); + } + + SmallVector &operator=(SmallVector &&other) SPIRV_CROSS_NOEXCEPT + { + clear(); + if (other.ptr != other.stack_storage.data()) + { + // Pilfer allocated pointer. + if (this->ptr != stack_storage.data()) + free(this->ptr); + this->ptr = other.ptr; + this->buffer_size = other.buffer_size; + buffer_capacity = other.buffer_capacity; + other.ptr = nullptr; + other.buffer_size = 0; + other.buffer_capacity = 0; + } + else + { + // Need to move the stack contents individually. + reserve(other.buffer_size); + for (size_t i = 0; i < other.buffer_size; i++) + { + new (&this->ptr[i]) T(std::move(other.ptr[i])); + other.ptr[i].~T(); + } + this->buffer_size = other.buffer_size; + other.buffer_size = 0; + } + return *this; + } + + SmallVector(const SmallVector &other) SPIRV_CROSS_NOEXCEPT : SmallVector() + { + *this = other; + } + + SmallVector &operator=(const SmallVector &other) SPIRV_CROSS_NOEXCEPT + { + if (this == &other) + return *this; + + clear(); + reserve(other.buffer_size); + for (size_t i = 0; i < other.buffer_size; i++) + new (&this->ptr[i]) T(other.ptr[i]); + this->buffer_size = other.buffer_size; + return *this; + } + + explicit SmallVector(size_t count) SPIRV_CROSS_NOEXCEPT : SmallVector() + { + resize(count); + } + + ~SmallVector() + { + clear(); + if (this->ptr != stack_storage.data()) + free(this->ptr); + } + + void clear() SPIRV_CROSS_NOEXCEPT + { + for (size_t i = 0; i < this->buffer_size; i++) + this->ptr[i].~T(); + this->buffer_size = 0; + } + + void push_back(const T &t) SPIRV_CROSS_NOEXCEPT + { + reserve(this->buffer_size + 1); + new (&this->ptr[this->buffer_size]) T(t); + this->buffer_size++; + } + + void push_back(T &&t) SPIRV_CROSS_NOEXCEPT + { + reserve(this->buffer_size + 1); + new (&this->ptr[this->buffer_size]) T(std::move(t)); + this->buffer_size++; + } + + void pop_back() SPIRV_CROSS_NOEXCEPT + { + // Work around false positive warning on GCC 8.3. + // Calling pop_back on empty vector is undefined. + if (!this->empty()) + resize(this->buffer_size - 1); + } + + template + void emplace_back(Ts &&... ts) SPIRV_CROSS_NOEXCEPT + { + reserve(this->buffer_size + 1); + new (&this->ptr[this->buffer_size]) T(std::forward(ts)...); + this->buffer_size++; + } + + void reserve(size_t count) SPIRV_CROSS_NOEXCEPT + { + if ((count > std::numeric_limits::max() / sizeof(T)) || + (count > std::numeric_limits::max() / 2)) + { + // Only way this should ever happen is with garbage input, terminate. + std::terminate(); + } + + if (count > buffer_capacity) + { + size_t target_capacity = buffer_capacity; + if (target_capacity == 0) + target_capacity = 1; + + // Weird parens works around macro issues on Windows if NOMINMAX is not used. + target_capacity = (std::max)(target_capacity, N); + + // Need to ensure there is a POT value of target capacity which is larger than count, + // otherwise this will overflow. + while (target_capacity < count) + target_capacity <<= 1u; + + T *new_buffer = + target_capacity > N ? static_cast(malloc(target_capacity * sizeof(T))) : stack_storage.data(); + + // If we actually fail this malloc, we are hosed anyways, there is no reason to attempt recovery. + if (!new_buffer) + std::terminate(); + + // In case for some reason two allocations both come from same stack. + if (new_buffer != this->ptr) + { + // We don't deal with types which can throw in move constructor. + for (size_t i = 0; i < this->buffer_size; i++) + { + new (&new_buffer[i]) T(std::move(this->ptr[i])); + this->ptr[i].~T(); + } + } + + if (this->ptr != stack_storage.data()) + free(this->ptr); + this->ptr = new_buffer; + buffer_capacity = target_capacity; + } + } + + void insert(T *itr, const T *insert_begin, const T *insert_end) SPIRV_CROSS_NOEXCEPT + { + auto count = size_t(insert_end - insert_begin); + if (itr == this->end()) + { + reserve(this->buffer_size + count); + for (size_t i = 0; i < count; i++, insert_begin++) + new (&this->ptr[this->buffer_size + i]) T(*insert_begin); + this->buffer_size += count; + } + else + { + if (this->buffer_size + count > buffer_capacity) + { + auto target_capacity = this->buffer_size + count; + if (target_capacity == 0) + target_capacity = 1; + if (target_capacity < N) + target_capacity = N; + + while (target_capacity < count) + target_capacity <<= 1u; + + // Need to allocate new buffer. Move everything to a new buffer. + T *new_buffer = + target_capacity > N ? static_cast(malloc(target_capacity * sizeof(T))) : stack_storage.data(); + + // If we actually fail this malloc, we are hosed anyways, there is no reason to attempt recovery. + if (!new_buffer) + std::terminate(); + + // First, move elements from source buffer to new buffer. + // We don't deal with types which can throw in move constructor. + auto *target_itr = new_buffer; + auto *original_source_itr = this->begin(); + + if (new_buffer != this->ptr) + { + while (original_source_itr != itr) + { + new (target_itr) T(std::move(*original_source_itr)); + original_source_itr->~T(); + ++original_source_itr; + ++target_itr; + } + } + + // Copy-construct new elements. + for (auto *source_itr = insert_begin; source_itr != insert_end; ++source_itr, ++target_itr) + new (target_itr) T(*source_itr); + + // Move over the other half. + if (new_buffer != this->ptr || insert_begin != insert_end) + { + while (original_source_itr != this->end()) + { + new (target_itr) T(std::move(*original_source_itr)); + original_source_itr->~T(); + ++original_source_itr; + ++target_itr; + } + } + + if (this->ptr != stack_storage.data()) + free(this->ptr); + this->ptr = new_buffer; + buffer_capacity = target_capacity; + } + else + { + // Move in place, need to be a bit careful about which elements are constructed and which are not. + // Move the end and construct the new elements. + auto *target_itr = this->end() + count; + auto *source_itr = this->end(); + while (target_itr != this->end() && source_itr != itr) + { + --target_itr; + --source_itr; + new (target_itr) T(std::move(*source_itr)); + } + + // For already constructed elements we can move-assign. + std::move_backward(itr, source_itr, target_itr); + + // For the inserts which go to already constructed elements, we can do a plain copy. + while (itr != this->end() && insert_begin != insert_end) + *itr++ = *insert_begin++; + + // For inserts into newly allocated memory, we must copy-construct instead. + while (insert_begin != insert_end) + { + new (itr) T(*insert_begin); + ++itr; + ++insert_begin; + } + } + + this->buffer_size += count; + } + } + + void insert(T *itr, const T &value) SPIRV_CROSS_NOEXCEPT + { + insert(itr, &value, &value + 1); + } + + T *erase(T *itr) SPIRV_CROSS_NOEXCEPT + { + std::move(itr + 1, this->end(), itr); + this->ptr[--this->buffer_size].~T(); + return itr; + } + + void erase(T *start_erase, T *end_erase) SPIRV_CROSS_NOEXCEPT + { + if (end_erase == this->end()) + { + resize(size_t(start_erase - this->begin())); + } + else + { + auto new_size = this->buffer_size - (end_erase - start_erase); + std::move(end_erase, this->end(), start_erase); + resize(new_size); + } + } + + void resize(size_t new_size) SPIRV_CROSS_NOEXCEPT + { + if (new_size < this->buffer_size) + { + for (size_t i = new_size; i < this->buffer_size; i++) + this->ptr[i].~T(); + } + else if (new_size > this->buffer_size) + { + reserve(new_size); + for (size_t i = this->buffer_size; i < new_size; i++) + new (&this->ptr[i]) T(); + } + + this->buffer_size = new_size; + } + +private: + size_t buffer_capacity = 0; + AlignedBuffer stack_storage; +}; + +// A vector without stack storage. +// Could also be a typedef-ed to std::vector, +// but might as well use the one we have. +template +using Vector = SmallVector; + +#else // SPIRV_CROSS_FORCE_STL_TYPES + +template +using SmallVector = std::vector; +template +using Vector = std::vector; +template +using VectorView = std::vector; + +#endif // SPIRV_CROSS_FORCE_STL_TYPES + +// An object pool which we use for allocating IVariant-derived objects. +// We know we are going to allocate a bunch of objects of each type, +// so amortize the mallocs. +class ObjectPoolBase +{ +public: + virtual ~ObjectPoolBase() = default; + virtual void free_opaque(void *ptr) = 0; +}; + +template +class ObjectPool : public ObjectPoolBase +{ +public: + explicit ObjectPool(unsigned start_object_count_ = 16) + : start_object_count(start_object_count_) + { + } + + template + T *allocate(P &&... p) + { + if (vacants.empty()) + { + unsigned num_objects = start_object_count << memory.size(); + T *ptr = static_cast(malloc(num_objects * sizeof(T))); + if (!ptr) + return nullptr; + + for (unsigned i = 0; i < num_objects; i++) + vacants.push_back(&ptr[i]); + + memory.emplace_back(ptr); + } + + T *ptr = vacants.back(); + vacants.pop_back(); + new (ptr) T(std::forward

(p)...); + return ptr; + } + + void free(T *ptr) + { + ptr->~T(); + vacants.push_back(ptr); + } + + void free_opaque(void *ptr) override + { + free(static_cast(ptr)); + } + + void clear() + { + vacants.clear(); + memory.clear(); + } + +protected: + Vector vacants; + + struct MallocDeleter + { + void operator()(T *ptr) + { + ::free(ptr); + } + }; + + SmallVector> memory; + unsigned start_object_count; +}; + +template +class StringStream +{ +public: + StringStream() + { + reset(); + } + + ~StringStream() + { + reset(); + } + + // Disable copies and moves. Makes it easier to implement, and we don't need it. + StringStream(const StringStream &) = delete; + void operator=(const StringStream &) = delete; + + template ::value, int>::type = 0> + StringStream &operator<<(const T &t) + { + auto s = std::to_string(t); + append(s.data(), s.size()); + return *this; + } + + // Only overload this to make float/double conversions ambiguous. + StringStream &operator<<(uint32_t v) + { + auto s = std::to_string(v); + append(s.data(), s.size()); + return *this; + } + + StringStream &operator<<(char c) + { + append(&c, 1); + return *this; + } + + StringStream &operator<<(const std::string &s) + { + append(s.data(), s.size()); + return *this; + } + + StringStream &operator<<(const char *s) + { + append(s, strlen(s)); + return *this; + } + + template + StringStream &operator<<(const char (&s)[N]) + { + append(s, strlen(s)); + return *this; + } + + std::string str() const + { + std::string ret; + size_t target_size = 0; + for (auto &saved : saved_buffers) + target_size += saved.offset; + target_size += current_buffer.offset; + ret.reserve(target_size); + + for (auto &saved : saved_buffers) + ret.insert(ret.end(), saved.buffer, saved.buffer + saved.offset); + ret.insert(ret.end(), current_buffer.buffer, current_buffer.buffer + current_buffer.offset); + return ret; + } + + void reset() + { + for (auto &saved : saved_buffers) + if (saved.buffer != stack_buffer) + free(saved.buffer); + if (current_buffer.buffer != stack_buffer) + free(current_buffer.buffer); + + saved_buffers.clear(); + current_buffer.buffer = stack_buffer; + current_buffer.offset = 0; + current_buffer.size = sizeof(stack_buffer); + } + +private: + struct Buffer + { + char *buffer = nullptr; + size_t offset = 0; + size_t size = 0; + }; + Buffer current_buffer; + char stack_buffer[StackSize]; + SmallVector saved_buffers; + + void append(const char *s, size_t len) + { + size_t avail = current_buffer.size - current_buffer.offset; + if (avail < len) + { + if (avail > 0) + { + memcpy(current_buffer.buffer + current_buffer.offset, s, avail); + s += avail; + len -= avail; + current_buffer.offset += avail; + } + + saved_buffers.push_back(current_buffer); + size_t target_size = len > BlockSize ? len : BlockSize; + current_buffer.buffer = static_cast(malloc(target_size)); + if (!current_buffer.buffer) + SPIRV_CROSS_THROW("Out of memory."); + + memcpy(current_buffer.buffer, s, len); + current_buffer.offset = len; + current_buffer.size = target_size; + } + else + { + memcpy(current_buffer.buffer + current_buffer.offset, s, len); + current_buffer.offset += len; + } + } +}; + +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_cross_error_handling.hpp b/dep/spirv-cross/spirv_cross_error_handling.hpp new file mode 100644 index 000000000..c0927e459 --- /dev/null +++ b/dep/spirv-cross/spirv_cross_error_handling.hpp @@ -0,0 +1,94 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_ERROR_HANDLING +#define SPIRV_CROSS_ERROR_HANDLING + +#include +#include +#include +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#include +#endif + +#ifdef SPIRV_CROSS_NAMESPACE_OVERRIDE +#define SPIRV_CROSS_NAMESPACE SPIRV_CROSS_NAMESPACE_OVERRIDE +#else +#define SPIRV_CROSS_NAMESPACE spirv_cross +#endif + +namespace SPIRV_CROSS_NAMESPACE +{ +#ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#if !defined(_MSC_VER) || defined(__clang__) +[[noreturn]] +#elif defined(_MSC_VER) +__declspec(noreturn) +#endif +inline void +report_and_abort(const std::string &msg) +{ +#ifdef NDEBUG + (void)msg; +#else + fprintf(stderr, "There was a compiler error: %s\n", msg.c_str()); +#endif + fflush(stderr); + abort(); +} + +#define SPIRV_CROSS_THROW(x) report_and_abort(x) +#else +class CompilerError : public std::runtime_error +{ +public: + explicit CompilerError(const std::string &str) + : std::runtime_error(str) + { + } +}; + +#define SPIRV_CROSS_THROW(x) throw CompilerError(x) +#endif + +// MSVC 2013 does not have noexcept. We need this for Variant to get move constructor to work correctly +// instead of copy constructor. +// MSVC 2013 ignores that move constructors cannot throw in std::vector, so just don't define it. +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define SPIRV_CROSS_NOEXCEPT +#else +#define SPIRV_CROSS_NOEXCEPT noexcept +#endif + +#if __cplusplus >= 201402l +#define SPIRV_CROSS_DEPRECATED(reason) [[deprecated(reason)]] +#elif defined(__GNUC__) +#define SPIRV_CROSS_DEPRECATED(reason) __attribute__((deprecated)) +#elif defined(_MSC_VER) +#define SPIRV_CROSS_DEPRECATED(reason) __declspec(deprecated(reason)) +#else +#define SPIRV_CROSS_DEPRECATED(reason) +#endif +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_cross_parsed_ir.cpp b/dep/spirv-cross/spirv_cross_parsed_ir.cpp new file mode 100644 index 000000000..ff1b63fbc --- /dev/null +++ b/dep/spirv-cross/spirv_cross_parsed_ir.cpp @@ -0,0 +1,1059 @@ +/* + * Copyright 2018-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_cross_parsed_ir.hpp" +#include +#include + +using namespace std; +using namespace spv; + +namespace SPIRV_CROSS_NAMESPACE +{ +ParsedIR::ParsedIR() +{ + // If we move ParsedIR, we need to make sure the pointer stays fixed since the child Variant objects consume a pointer to this group, + // so need an extra pointer here. + pool_group.reset(new ObjectPoolGroup); + + pool_group->pools[TypeType].reset(new ObjectPool); + pool_group->pools[TypeVariable].reset(new ObjectPool); + pool_group->pools[TypeConstant].reset(new ObjectPool); + pool_group->pools[TypeFunction].reset(new ObjectPool); + pool_group->pools[TypeFunctionPrototype].reset(new ObjectPool); + pool_group->pools[TypeBlock].reset(new ObjectPool); + pool_group->pools[TypeExtension].reset(new ObjectPool); + pool_group->pools[TypeExpression].reset(new ObjectPool); + pool_group->pools[TypeConstantOp].reset(new ObjectPool); + pool_group->pools[TypeCombinedImageSampler].reset(new ObjectPool); + pool_group->pools[TypeAccessChain].reset(new ObjectPool); + pool_group->pools[TypeUndef].reset(new ObjectPool); + pool_group->pools[TypeString].reset(new ObjectPool); +} + +// Should have been default-implemented, but need this on MSVC 2013. +ParsedIR::ParsedIR(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT +{ + *this = move(other); +} + +ParsedIR &ParsedIR::operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT +{ + if (this != &other) + { + pool_group = move(other.pool_group); + spirv = move(other.spirv); + meta = move(other.meta); + for (int i = 0; i < TypeCount; i++) + ids_for_type[i] = move(other.ids_for_type[i]); + ids_for_constant_or_type = move(other.ids_for_constant_or_type); + ids_for_constant_or_variable = move(other.ids_for_constant_or_variable); + declared_capabilities = move(other.declared_capabilities); + declared_extensions = move(other.declared_extensions); + block_meta = move(other.block_meta); + continue_block_to_loop_header = move(other.continue_block_to_loop_header); + entry_points = move(other.entry_points); + ids = move(other.ids); + addressing_model = other.addressing_model; + memory_model = other.memory_model; + + default_entry_point = other.default_entry_point; + source = other.source; + loop_iteration_depth_hard = other.loop_iteration_depth_hard; + loop_iteration_depth_soft = other.loop_iteration_depth_soft; + + meta_needing_name_fixup = std::move(other.meta_needing_name_fixup); + } + return *this; +} + +ParsedIR::ParsedIR(const ParsedIR &other) + : ParsedIR() +{ + *this = other; +} + +ParsedIR &ParsedIR::operator=(const ParsedIR &other) +{ + if (this != &other) + { + spirv = other.spirv; + meta = other.meta; + for (int i = 0; i < TypeCount; i++) + ids_for_type[i] = other.ids_for_type[i]; + ids_for_constant_or_type = other.ids_for_constant_or_type; + ids_for_constant_or_variable = other.ids_for_constant_or_variable; + declared_capabilities = other.declared_capabilities; + declared_extensions = other.declared_extensions; + block_meta = other.block_meta; + continue_block_to_loop_header = other.continue_block_to_loop_header; + entry_points = other.entry_points; + default_entry_point = other.default_entry_point; + source = other.source; + loop_iteration_depth_hard = other.loop_iteration_depth_hard; + loop_iteration_depth_soft = other.loop_iteration_depth_soft; + addressing_model = other.addressing_model; + memory_model = other.memory_model; + + meta_needing_name_fixup = other.meta_needing_name_fixup; + + // Very deliberate copying of IDs. There is no default copy constructor, nor a simple default constructor. + // Construct object first so we have the correct allocator set-up, then we can copy object into our new pool group. + ids.clear(); + ids.reserve(other.ids.size()); + for (size_t i = 0; i < other.ids.size(); i++) + { + ids.emplace_back(pool_group.get()); + ids.back() = other.ids[i]; + } + } + return *this; +} + +void ParsedIR::set_id_bounds(uint32_t bounds) +{ + ids.reserve(bounds); + while (ids.size() < bounds) + ids.emplace_back(pool_group.get()); + + block_meta.resize(bounds); +} + +// Roll our own versions of these functions to avoid potential locale shenanigans. +static bool is_alpha(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static bool is_numeric(char c) +{ + return c >= '0' && c <= '9'; +} + +static bool is_alphanumeric(char c) +{ + return is_alpha(c) || is_numeric(c); +} + +static bool is_valid_identifier(const string &name) +{ + if (name.empty()) + return true; + + if (is_numeric(name[0])) + return false; + + for (auto c : name) + if (!is_alphanumeric(c) && c != '_') + return false; + + bool saw_underscore = false; + // Two underscores in a row is not a valid identifier either. + // Technically reserved, but it's easier to treat it as invalid. + for (auto c : name) + { + bool is_underscore = c == '_'; + if (is_underscore && saw_underscore) + return false; + saw_underscore = is_underscore; + } + + return true; +} + +static bool is_reserved_prefix(const string &name) +{ + // Generic reserved identifiers used by the implementation. + return name.compare(0, 3, "gl_", 3) == 0 || + // Ignore this case for now, might rewrite internal code to always use spv prefix. + //name.compare(0, 11, "SPIRV_Cross", 11) == 0 || + name.compare(0, 3, "spv", 3) == 0; +} + +static bool is_reserved_identifier(const string &name, bool member, bool allow_reserved_prefixes) +{ + if (!allow_reserved_prefixes && is_reserved_prefix(name)) + return true; + + if (member) + { + // Reserved member identifiers come in one form: + // _m[0-9]+$. + if (name.size() < 3) + return false; + + if (name.compare(0, 2, "_m", 2) != 0) + return false; + + size_t index = 2; + while (index < name.size() && is_numeric(name[index])) + index++; + + return index == name.size(); + } + else + { + // Reserved non-member identifiers come in two forms: + // _[0-9]+$, used for temporaries which map directly to a SPIR-V ID. + // _[0-9]+_, used for auxillary temporaries which derived from a SPIR-V ID. + if (name.size() < 2) + return false; + + if (name[0] != '_' || !is_numeric(name[1])) + return false; + + size_t index = 2; + while (index < name.size() && is_numeric(name[index])) + index++; + + return index == name.size() || (index < name.size() && name[index] == '_'); + } +} + +bool ParsedIR::is_globally_reserved_identifier(std::string &str, bool allow_reserved_prefixes) +{ + return is_reserved_identifier(str, false, allow_reserved_prefixes); +} + +static string make_unreserved_identifier(const string &name) +{ + if (is_reserved_prefix(name)) + return "_RESERVED_IDENTIFIER_FIXUP_" + name; + else + return "_RESERVED_IDENTIFIER_FIXUP" + name; +} + +void ParsedIR::sanitize_underscores(std::string &str) +{ + // Compact adjacent underscores to make it valid. + auto dst = str.begin(); + auto src = dst; + bool saw_underscore = false; + while (src != str.end()) + { + bool is_underscore = *src == '_'; + if (saw_underscore && is_underscore) + { + src++; + } + else + { + if (dst != src) + *dst = *src; + dst++; + src++; + saw_underscore = is_underscore; + } + } + str.erase(dst, str.end()); +} + +static string ensure_valid_identifier(const string &name) +{ + // Functions in glslangValidator are mangled with name( stuff. + // Normally, we would never see '(' in any legal identifiers, so just strip them out. + auto str = name.substr(0, name.find('(')); + + if (str.empty()) + return str; + + if (is_numeric(str[0])) + str[0] = '_'; + + for (auto &c : str) + if (!is_alphanumeric(c) && c != '_') + c = '_'; + + ParsedIR::sanitize_underscores(str); + return str; +} + +const string &ParsedIR::get_name(ID id) const +{ + auto *m = find_meta(id); + if (m) + return m->decoration.alias; + else + return empty_string; +} + +const string &ParsedIR::get_member_name(TypeID id, uint32_t index) const +{ + auto *m = find_meta(id); + if (m) + { + if (index >= m->members.size()) + return empty_string; + return m->members[index].alias; + } + else + return empty_string; +} + +void ParsedIR::sanitize_identifier(std::string &name, bool member, bool allow_reserved_prefixes) +{ + if (!is_valid_identifier(name)) + name = ensure_valid_identifier(name); + if (is_reserved_identifier(name, member, allow_reserved_prefixes)) + name = make_unreserved_identifier(name); +} + +void ParsedIR::fixup_reserved_names() +{ + for (uint32_t id : meta_needing_name_fixup) + { + auto &m = meta[id]; + sanitize_identifier(m.decoration.alias, false, false); + for (auto &memb : m.members) + sanitize_identifier(memb.alias, true, false); + } + meta_needing_name_fixup.clear(); +} + +void ParsedIR::set_name(ID id, const string &name) +{ + auto &m = meta[id]; + m.decoration.alias = name; + if (!is_valid_identifier(name) || is_reserved_identifier(name, false, false)) + meta_needing_name_fixup.insert(id); +} + +void ParsedIR::set_member_name(TypeID id, uint32_t index, const string &name) +{ + auto &m = meta[id]; + m.members.resize(max(meta[id].members.size(), size_t(index) + 1)); + m.members[index].alias = name; + if (!is_valid_identifier(name) || is_reserved_identifier(name, true, false)) + meta_needing_name_fixup.insert(id); +} + +void ParsedIR::set_decoration_string(ID id, Decoration decoration, const string &argument) +{ + auto &dec = meta[id].decoration; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic = argument; + break; + + default: + break; + } +} + +void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument) +{ + auto &dec = meta[id].decoration; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationComponent: + dec.component = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + case DecorationXfbBuffer: + dec.xfb_buffer = argument; + break; + + case DecorationXfbStride: + dec.xfb_stride = argument; + break; + + case DecorationStream: + dec.stream = argument; + break; + + case DecorationArrayStride: + dec.array_stride = argument; + break; + + case DecorationMatrixStride: + dec.matrix_stride = argument; + break; + + case DecorationBinding: + dec.binding = argument; + break; + + case DecorationDescriptorSet: + dec.set = argument; + break; + + case DecorationInputAttachmentIndex: + dec.input_attachment = argument; + break; + + case DecorationSpecId: + dec.spec_id = argument; + break; + + case DecorationIndex: + dec.index = argument; + break; + + case DecorationHlslCounterBufferGOOGLE: + meta[id].hlsl_magic_counter_buffer = argument; + meta[argument].hlsl_is_magic_counter_buffer = true; + break; + + case DecorationFPRoundingMode: + dec.fp_rounding_mode = static_cast(argument); + break; + + default: + break; + } +} + +void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) +{ + meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); + auto &dec = meta[id].members[index]; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationComponent: + dec.component = argument; + break; + + case DecorationBinding: + dec.binding = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + case DecorationXfbBuffer: + dec.xfb_buffer = argument; + break; + + case DecorationXfbStride: + dec.xfb_stride = argument; + break; + + case DecorationStream: + dec.stream = argument; + break; + + case DecorationSpecId: + dec.spec_id = argument; + break; + + case DecorationMatrixStride: + dec.matrix_stride = argument; + break; + + case DecorationIndex: + dec.index = argument; + break; + + default: + break; + } +} + +// Recursively marks any constants referenced by the specified constant instruction as being used +// as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp). +void ParsedIR::mark_used_as_array_length(ID id) +{ + switch (ids[id].get_type()) + { + case TypeConstant: + get(id).is_used_as_array_length = true; + break; + + case TypeConstantOp: + { + auto &cop = get(id); + if (cop.opcode == OpCompositeExtract) + mark_used_as_array_length(cop.arguments[0]); + else if (cop.opcode == OpCompositeInsert) + { + mark_used_as_array_length(cop.arguments[0]); + mark_used_as_array_length(cop.arguments[1]); + } + else + for (uint32_t arg_id : cop.arguments) + mark_used_as_array_length(arg_id); + break; + } + + case TypeUndef: + break; + + default: + assert(0); + } +} + +Bitset ParsedIR::get_buffer_block_type_flags(const SPIRType &type) const +{ + if (type.member_types.empty()) + return {}; + + Bitset all_members_flags = get_member_decoration_bitset(type.self, 0); + for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++) + all_members_flags.merge_and(get_member_decoration_bitset(type.self, i)); + return all_members_flags; +} + +Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const +{ + auto &type = get(var.basetype); + assert(type.basetype == SPIRType::Struct); + + // Some flags like non-writable, non-readable are actually found + // as member decorations. If all members have a decoration set, propagate + // the decoration up as a regular variable decoration. + Bitset base_flags; + auto *m = find_meta(var.self); + if (m) + base_flags = m->decoration.decoration_flags; + + if (type.member_types.empty()) + return base_flags; + + auto all_members_flags = get_buffer_block_type_flags(type); + base_flags.merge_or(all_members_flags); + return base_flags; +} + +const Bitset &ParsedIR::get_member_decoration_bitset(TypeID id, uint32_t index) const +{ + auto *m = find_meta(id); + if (m) + { + if (index >= m->members.size()) + return cleared_bitset; + return m->members[index].decoration_flags; + } + else + return cleared_bitset; +} + +bool ParsedIR::has_decoration(ID id, Decoration decoration) const +{ + return get_decoration_bitset(id).get(decoration); +} + +uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const +{ + auto *m = find_meta(id); + if (!m) + return 0; + + auto &dec = m->decoration; + if (!dec.decoration_flags.get(decoration)) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: + return dec.builtin_type; + case DecorationLocation: + return dec.location; + case DecorationComponent: + return dec.component; + case DecorationOffset: + return dec.offset; + case DecorationXfbBuffer: + return dec.xfb_buffer; + case DecorationXfbStride: + return dec.xfb_stride; + case DecorationStream: + return dec.stream; + case DecorationBinding: + return dec.binding; + case DecorationDescriptorSet: + return dec.set; + case DecorationInputAttachmentIndex: + return dec.input_attachment; + case DecorationSpecId: + return dec.spec_id; + case DecorationArrayStride: + return dec.array_stride; + case DecorationMatrixStride: + return dec.matrix_stride; + case DecorationIndex: + return dec.index; + case DecorationFPRoundingMode: + return dec.fp_rounding_mode; + default: + return 1; + } +} + +const string &ParsedIR::get_decoration_string(ID id, Decoration decoration) const +{ + auto *m = find_meta(id); + if (!m) + return empty_string; + + auto &dec = m->decoration; + + if (!dec.decoration_flags.get(decoration)) + return empty_string; + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + return dec.hlsl_semantic; + + default: + return empty_string; + } +} + +void ParsedIR::unset_decoration(ID id, Decoration decoration) +{ + auto &dec = meta[id].decoration; + dec.decoration_flags.clear(decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationComponent: + dec.component = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + case DecorationXfbBuffer: + dec.xfb_buffer = 0; + break; + + case DecorationXfbStride: + dec.xfb_stride = 0; + break; + + case DecorationStream: + dec.stream = 0; + break; + + case DecorationBinding: + dec.binding = 0; + break; + + case DecorationDescriptorSet: + dec.set = 0; + break; + + case DecorationInputAttachmentIndex: + dec.input_attachment = 0; + break; + + case DecorationSpecId: + dec.spec_id = 0; + break; + + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic.clear(); + break; + + case DecorationFPRoundingMode: + dec.fp_rounding_mode = FPRoundingModeMax; + break; + + case DecorationHlslCounterBufferGOOGLE: + { + auto &counter = meta[id].hlsl_magic_counter_buffer; + if (counter) + { + meta[counter].hlsl_is_magic_counter_buffer = false; + counter = 0; + } + break; + } + + default: + break; + } +} + +bool ParsedIR::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const +{ + return get_member_decoration_bitset(id, index).get(decoration); +} + +uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const +{ + auto *m = find_meta(id); + if (!m) + return 0; + + if (index >= m->members.size()) + return 0; + + auto &dec = m->members[index]; + if (!dec.decoration_flags.get(decoration)) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: + return dec.builtin_type; + case DecorationLocation: + return dec.location; + case DecorationComponent: + return dec.component; + case DecorationBinding: + return dec.binding; + case DecorationOffset: + return dec.offset; + case DecorationXfbBuffer: + return dec.xfb_buffer; + case DecorationXfbStride: + return dec.xfb_stride; + case DecorationStream: + return dec.stream; + case DecorationSpecId: + return dec.spec_id; + case DecorationIndex: + return dec.index; + default: + return 1; + } +} + +const Bitset &ParsedIR::get_decoration_bitset(ID id) const +{ + auto *m = find_meta(id); + if (m) + { + auto &dec = m->decoration; + return dec.decoration_flags; + } + else + return cleared_bitset; +} + +void ParsedIR::set_member_decoration_string(TypeID id, uint32_t index, Decoration decoration, const string &argument) +{ + meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); + auto &dec = meta[id].members[index]; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic = argument; + break; + + default: + break; + } +} + +const string &ParsedIR::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const +{ + auto *m = find_meta(id); + if (m) + { + if (!has_member_decoration(id, index, decoration)) + return empty_string; + + auto &dec = m->members[index]; + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + return dec.hlsl_semantic; + + default: + return empty_string; + } + } + else + return empty_string; +} + +void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) +{ + auto &m = meta[id]; + if (index >= m.members.size()) + return; + + auto &dec = m.members[index]; + + dec.decoration_flags.clear(decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationComponent: + dec.component = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + case DecorationXfbBuffer: + dec.xfb_buffer = 0; + break; + + case DecorationXfbStride: + dec.xfb_stride = 0; + break; + + case DecorationStream: + dec.stream = 0; + break; + + case DecorationSpecId: + dec.spec_id = 0; + break; + + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic.clear(); + break; + + default: + break; + } +} + +uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount) +{ + auto curr_bound = ids.size(); + auto new_bound = curr_bound + incr_amount; + + ids.reserve(ids.size() + incr_amount); + for (uint32_t i = 0; i < incr_amount; i++) + ids.emplace_back(pool_group.get()); + + block_meta.resize(new_bound); + return uint32_t(curr_bound); +} + +void ParsedIR::remove_typed_id(Types type, ID id) +{ + auto &type_ids = ids_for_type[type]; + type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids)); +} + +void ParsedIR::reset_all_of_type(Types type) +{ + for (auto &id : ids_for_type[type]) + if (ids[id].get_type() == type) + ids[id].reset(); + + ids_for_type[type].clear(); +} + +void ParsedIR::add_typed_id(Types type, ID id) +{ + if (loop_iteration_depth_hard != 0) + SPIRV_CROSS_THROW("Cannot add typed ID while looping over it."); + + if (loop_iteration_depth_soft != 0) + { + if (!ids[id].empty()) + SPIRV_CROSS_THROW("Cannot override IDs when loop is soft locked."); + return; + } + + if (ids[id].empty() || ids[id].get_type() != type) + { + switch (type) + { + case TypeConstant: + ids_for_constant_or_variable.push_back(id); + ids_for_constant_or_type.push_back(id); + break; + + case TypeVariable: + ids_for_constant_or_variable.push_back(id); + break; + + case TypeType: + case TypeConstantOp: + ids_for_constant_or_type.push_back(id); + break; + + default: + break; + } + } + + if (ids[id].empty()) + { + ids_for_type[type].push_back(id); + } + else if (ids[id].get_type() != type) + { + remove_typed_id(ids[id].get_type(), id); + ids_for_type[type].push_back(id); + } +} + +const Meta *ParsedIR::find_meta(ID id) const +{ + auto itr = meta.find(id); + if (itr != end(meta)) + return &itr->second; + else + return nullptr; +} + +Meta *ParsedIR::find_meta(ID id) +{ + auto itr = meta.find(id); + if (itr != end(meta)) + return &itr->second; + else + return nullptr; +} + +ParsedIR::LoopLock ParsedIR::create_loop_hard_lock() const +{ + return ParsedIR::LoopLock(&loop_iteration_depth_hard); +} + +ParsedIR::LoopLock ParsedIR::create_loop_soft_lock() const +{ + return ParsedIR::LoopLock(&loop_iteration_depth_soft); +} + +ParsedIR::LoopLock::~LoopLock() +{ + if (lock) + (*lock)--; +} + +ParsedIR::LoopLock::LoopLock(uint32_t *lock_) + : lock(lock_) +{ + if (lock) + (*lock)++; +} + +ParsedIR::LoopLock::LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT +{ + *this = move(other); +} + +ParsedIR::LoopLock &ParsedIR::LoopLock::operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT +{ + if (lock) + (*lock)--; + lock = other.lock; + other.lock = nullptr; + return *this; +} + +void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set) +{ + auto &constant_type = get(type); + + if (constant_type.pointer) + { + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + auto &constant = variant_set(ids[id], type); + constant.self = id; + constant.make_null(constant_type); + } + else if (!constant_type.array.empty()) + { + assert(constant_type.parent_type); + uint32_t parent_id = increase_bound_by(1); + make_constant_null(parent_id, constant_type.parent_type, add_to_typed_id_set); + + if (!constant_type.array_size_literal.back()) + SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); + + SmallVector elements(constant_type.array.back()); + for (uint32_t i = 0; i < constant_type.array.back(); i++) + elements[i] = parent_id; + + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id; + } + else if (!constant_type.member_types.empty()) + { + uint32_t member_ids = increase_bound_by(uint32_t(constant_type.member_types.size())); + SmallVector elements(constant_type.member_types.size()); + for (uint32_t i = 0; i < constant_type.member_types.size(); i++) + { + make_constant_null(member_ids + i, constant_type.member_types[i], add_to_typed_id_set); + elements[i] = member_ids + i; + } + + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id; + } + else + { + if (add_to_typed_id_set) + add_typed_id(TypeConstant, id); + auto &constant = variant_set(ids[id], type); + constant.self = id; + constant.make_null(constant_type); + } +} + +} // namespace SPIRV_CROSS_NAMESPACE diff --git a/dep/spirv-cross/spirv_cross_parsed_ir.hpp b/dep/spirv-cross/spirv_cross_parsed_ir.hpp new file mode 100644 index 000000000..ee202d2fa --- /dev/null +++ b/dep/spirv-cross/spirv_cross_parsed_ir.hpp @@ -0,0 +1,247 @@ +/* + * Copyright 2018-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_PARSED_IR_HPP +#define SPIRV_CROSS_PARSED_IR_HPP + +#include "spirv_common.hpp" +#include +#include + +namespace SPIRV_CROSS_NAMESPACE +{ + +// This data structure holds all information needed to perform cross-compilation and reflection. +// It is the output of the Parser, but any implementation could create this structure. +// It is intentionally very "open" and struct-like with some helper functions to deal with decorations. +// Parser is the reference implementation of how this data structure should be filled in. + +class ParsedIR +{ +private: + // This must be destroyed after the "ids" vector. + std::unique_ptr pool_group; + +public: + ParsedIR(); + + // Due to custom allocations from object pools, we cannot use a default copy constructor. + ParsedIR(const ParsedIR &other); + ParsedIR &operator=(const ParsedIR &other); + + // Moves are unproblematic, but we need to implement it anyways, since MSVC 2013 does not understand + // how to default-implement these. + ParsedIR(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT; + ParsedIR &operator=(ParsedIR &&other) SPIRV_CROSS_NOEXCEPT; + + // Resizes ids, meta and block_meta. + void set_id_bounds(uint32_t bounds); + + // The raw SPIR-V, instructions and opcodes refer to this by offset + count. + std::vector spirv; + + // Holds various data structures which inherit from IVariant. + SmallVector ids; + + // Various meta data for IDs, decorations, names, etc. + std::unordered_map meta; + + // Holds all IDs which have a certain type. + // This is needed so we can iterate through a specific kind of resource quickly, + // and in-order of module declaration. + SmallVector ids_for_type[TypeCount]; + + // Special purpose lists which contain a union of types. + // This is needed so we can declare specialization constants and structs in an interleaved fashion, + // among other things. + // Constants can be of struct type, and struct array sizes can use specialization constants. + SmallVector ids_for_constant_or_type; + SmallVector ids_for_constant_or_variable; + + // Declared capabilities and extensions in the SPIR-V module. + // Not really used except for reflection at the moment. + SmallVector declared_capabilities; + SmallVector declared_extensions; + + // Meta data about blocks. The cross-compiler needs to query if a block is either of these types. + // It is a bitset as there can be more than one tag per block. + enum BlockMetaFlagBits + { + BLOCK_META_LOOP_HEADER_BIT = 1 << 0, + BLOCK_META_CONTINUE_BIT = 1 << 1, + BLOCK_META_LOOP_MERGE_BIT = 1 << 2, + BLOCK_META_SELECTION_MERGE_BIT = 1 << 3, + BLOCK_META_MULTISELECT_MERGE_BIT = 1 << 4 + }; + using BlockMetaFlags = uint8_t; + SmallVector block_meta; + std::unordered_map continue_block_to_loop_header; + + // Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction. + // Entry points can therefore be seen as some sort of meta structure. + std::unordered_map entry_points; + FunctionID default_entry_point = 0; + + struct Source + { + uint32_t version = 0; + bool es = false; + bool known = false; + bool hlsl = false; + + Source() = default; + }; + + Source source; + + spv::AddressingModel addressing_model = spv::AddressingModelMax; + spv::MemoryModel memory_model = spv::MemoryModelMax; + + // Decoration handling methods. + // Can be useful for simple "raw" reflection. + // However, most members are here because the Parser needs most of these, + // and might as well just have the whole suite of decoration/name handling in one place. + void set_name(ID id, const std::string &name); + const std::string &get_name(ID id) const; + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); + bool has_decoration(ID id, spv::Decoration decoration) const; + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; + const Bitset &get_decoration_bitset(ID id) const; + void unset_decoration(ID id, spv::Decoration decoration); + + // Decoration handling methods (for members of a struct). + void set_member_name(TypeID id, uint32_t index, const std::string &name); + const std::string &get_member_name(TypeID id, uint32_t index) const; + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, + const std::string &argument); + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); + + void mark_used_as_array_length(ID id); + uint32_t increase_bound_by(uint32_t count); + Bitset get_buffer_block_flags(const SPIRVariable &var) const; + Bitset get_buffer_block_type_flags(const SPIRType &type) const; + + void add_typed_id(Types type, ID id); + void remove_typed_id(Types type, ID id); + + class LoopLock + { + public: + explicit LoopLock(uint32_t *counter); + LoopLock(const LoopLock &) = delete; + void operator=(const LoopLock &) = delete; + LoopLock(LoopLock &&other) SPIRV_CROSS_NOEXCEPT; + LoopLock &operator=(LoopLock &&other) SPIRV_CROSS_NOEXCEPT; + ~LoopLock(); + + private: + uint32_t *lock; + }; + + // This must be held while iterating over a type ID array. + // It is undefined if someone calls set<>() while we're iterating over a data structure, so we must + // make sure that this case is avoided. + + // If we have a hard lock, it is an error to call set<>(), and an exception is thrown. + // If we have a soft lock, we silently ignore any additions to the typed arrays. + // This should only be used for physical ID remapping where we need to create an ID, but we will never + // care about iterating over them. + LoopLock create_loop_hard_lock() const; + LoopLock create_loop_soft_lock() const; + + template + void for_each_typed_id(const Op &op) + { + auto loop_lock = create_loop_hard_lock(); + for (auto &id : ids_for_type[T::type]) + { + if (ids[id].get_type() == static_cast(T::type)) + op(id, get(id)); + } + } + + template + void for_each_typed_id(const Op &op) const + { + auto loop_lock = create_loop_hard_lock(); + for (auto &id : ids_for_type[T::type]) + { + if (ids[id].get_type() == static_cast(T::type)) + op(id, get(id)); + } + } + + template + void reset_all_of_type() + { + reset_all_of_type(static_cast(T::type)); + } + + void reset_all_of_type(Types type); + + Meta *find_meta(ID id); + const Meta *find_meta(ID id) const; + + const std::string &get_empty_string() const + { + return empty_string; + } + + void make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_id_set); + + void fixup_reserved_names(); + + static void sanitize_underscores(std::string &str); + static void sanitize_identifier(std::string &str, bool member, bool allow_reserved_prefixes); + static bool is_globally_reserved_identifier(std::string &str, bool allow_reserved_prefixes); + +private: + template + T &get(uint32_t id) + { + return variant_get(ids[id]); + } + + template + const T &get(uint32_t id) const + { + return variant_get(ids[id]); + } + + mutable uint32_t loop_iteration_depth_hard = 0; + mutable uint32_t loop_iteration_depth_soft = 0; + std::string empty_string; + Bitset cleared_bitset; + + std::unordered_set meta_needing_name_fixup; +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_cross_util.cpp b/dep/spirv-cross/spirv_cross_util.cpp new file mode 100644 index 000000000..f9b5c5f9a --- /dev/null +++ b/dep/spirv-cross/spirv_cross_util.cpp @@ -0,0 +1,77 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_cross_util.hpp" +#include "spirv_common.hpp" + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; + +namespace spirv_cross_util +{ +void rename_interface_variable(Compiler &compiler, const SmallVector &resources, uint32_t location, + const std::string &name) +{ + for (auto &v : resources) + { + if (!compiler.has_decoration(v.id, spv::DecorationLocation)) + continue; + + auto loc = compiler.get_decoration(v.id, spv::DecorationLocation); + if (loc != location) + continue; + + auto &type = compiler.get_type(v.base_type_id); + + // This is more of a friendly variant. If we need to rename interface variables, we might have to rename + // structs as well and make sure all the names match up. + if (type.basetype == SPIRType::Struct) + { + compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", location)); + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i)); + } + + compiler.set_name(v.id, name); + } +} + +void inherit_combined_sampler_bindings(Compiler &compiler) +{ + auto &samplers = compiler.get_combined_image_samplers(); + for (auto &s : samplers) + { + if (compiler.has_decoration(s.image_id, spv::DecorationDescriptorSet)) + { + uint32_t set = compiler.get_decoration(s.image_id, spv::DecorationDescriptorSet); + compiler.set_decoration(s.combined_id, spv::DecorationDescriptorSet, set); + } + + if (compiler.has_decoration(s.image_id, spv::DecorationBinding)) + { + uint32_t binding = compiler.get_decoration(s.image_id, spv::DecorationBinding); + compiler.set_decoration(s.combined_id, spv::DecorationBinding, binding); + } + } +} +} // namespace spirv_cross_util diff --git a/dep/spirv-cross/spirv_cross_util.hpp b/dep/spirv-cross/spirv_cross_util.hpp new file mode 100644 index 000000000..b0501f33b --- /dev/null +++ b/dep/spirv-cross/spirv_cross_util.hpp @@ -0,0 +1,37 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_UTIL_HPP +#define SPIRV_CROSS_UTIL_HPP + +#include "spirv_cross.hpp" + +namespace spirv_cross_util +{ +void rename_interface_variable(SPIRV_CROSS_NAMESPACE::Compiler &compiler, + const SPIRV_CROSS_NAMESPACE::SmallVector &resources, + uint32_t location, const std::string &name); +void inherit_combined_sampler_bindings(SPIRV_CROSS_NAMESPACE::Compiler &compiler); +} // namespace spirv_cross_util + +#endif diff --git a/dep/spirv-cross/spirv_glsl.cpp b/dep/spirv-cross/spirv_glsl.cpp new file mode 100644 index 000000000..ecc5fb305 --- /dev/null +++ b/dep/spirv-cross/spirv_glsl.cpp @@ -0,0 +1,15350 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_glsl.hpp" +#include "GLSL.std.450.h" +#include "spirv_common.hpp" +#include +#include +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#endif +#include + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +static bool is_unsigned_opcode(Op op) +{ + // Don't have to be exhaustive, only relevant for legacy target checking ... + switch (op) + { + case OpShiftRightLogical: + case OpUGreaterThan: + case OpUGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUConvert: + case OpUDiv: + case OpUMod: + case OpUMulExtended: + case OpConvertUToF: + case OpConvertFToU: + return true; + + default: + return false; + } +} + +static bool is_unsigned_glsl_opcode(GLSLstd450 op) +{ + // Don't have to be exhaustive, only relevant for legacy target checking ... + switch (op) + { + case GLSLstd450UClamp: + case GLSLstd450UMin: + case GLSLstd450UMax: + case GLSLstd450FindUMsb: + return true; + + default: + return false; + } +} + +static bool packing_is_vec4_padded(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + case BufferPackingStd140: + case BufferPackingStd140EnhancedLayout: + return true; + + default: + return false; + } +} + +static bool packing_is_hlsl(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + return true; + + default: + return false; + } +} + +static bool packing_has_flexible_offset(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140: + case BufferPackingStd430: + case BufferPackingScalar: + case BufferPackingHLSLCbuffer: + return false; + + default: + return true; + } +} + +static bool packing_is_scalar(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingScalar: + case BufferPackingScalarEnhancedLayout: + return true; + + default: + return false; + } +} + +static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140EnhancedLayout: + return BufferPackingStd140; + case BufferPackingStd430EnhancedLayout: + return BufferPackingStd430; + case BufferPackingHLSLCbufferPackOffset: + return BufferPackingHLSLCbuffer; + case BufferPackingScalarEnhancedLayout: + return BufferPackingScalar; + default: + return packing; + } +} + +void CompilerGLSL::init() +{ + if (ir.source.known) + { + options.es = ir.source.es; + options.version = ir.source.version; + } + + // Query the locale to see what the decimal point is. + // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale + // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather + // tricky. +#ifdef _WIN32 + // On Windows, localeconv uses thread-local storage, so it should be fine. + const struct lconv *conv = localeconv(); + if (conv && conv->decimal_point) + current_locale_radix_character = *conv->decimal_point; +#elif defined(__ANDROID__) && __ANDROID_API__ < 26 + // nl_langinfo is not supported on this platform, fall back to the worse alternative. + const struct lconv *conv = localeconv(); + if (conv && conv->decimal_point) + current_locale_radix_character = *conv->decimal_point; +#else + // localeconv, the portable function is not MT safe ... + const char *decimal_point = nl_langinfo(RADIXCHAR); + if (decimal_point && *decimal_point != '\0') + current_locale_radix_character = *decimal_point; +#endif +} + +static const char *to_pls_layout(PlsFormat format) +{ + switch (format) + { + case PlsR11FG11FB10F: + return "layout(r11f_g11f_b10f) "; + case PlsR32F: + return "layout(r32f) "; + case PlsRG16F: + return "layout(rg16f) "; + case PlsRGB10A2: + return "layout(rgb10_a2) "; + case PlsRGBA8: + return "layout(rgba8) "; + case PlsRG16: + return "layout(rg16) "; + case PlsRGBA8I: + return "layout(rgba8i)"; + case PlsRG16I: + return "layout(rg16i) "; + case PlsRGB10A2UI: + return "layout(rgb10_a2ui) "; + case PlsRGBA8UI: + return "layout(rgba8ui) "; + case PlsRG16UI: + return "layout(rg16ui) "; + case PlsR32UI: + return "layout(r32ui) "; + default: + return ""; + } +} + +static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) +{ + switch (format) + { + default: + case PlsR11FG11FB10F: + case PlsR32F: + case PlsRG16F: + case PlsRGB10A2: + case PlsRGBA8: + case PlsRG16: + return SPIRType::Float; + + case PlsRGBA8I: + case PlsRG16I: + return SPIRType::Int; + + case PlsRGB10A2UI: + case PlsRGBA8UI: + case PlsRG16UI: + case PlsR32UI: + return SPIRType::UInt; + } +} + +static uint32_t pls_format_to_components(PlsFormat format) +{ + switch (format) + { + default: + case PlsR32F: + case PlsR32UI: + return 1; + + case PlsRG16F: + case PlsRG16: + case PlsRG16UI: + case PlsRG16I: + return 2; + + case PlsR11FG11FB10F: + return 3; + + case PlsRGB10A2: + case PlsRGBA8: + case PlsRGBA8I: + case PlsRGB10A2UI: + case PlsRGBA8UI: + return 4; + } +} + +const char *CompilerGLSL::vector_swizzle(int vecsize, int index) +{ + static const char *const swizzle[4][4] = { + { ".x", ".y", ".z", ".w" }, + { ".xy", ".yz", ".zw", nullptr }, + { ".xyz", ".yzw", nullptr, nullptr }, +#if defined(__GNUC__) && (__GNUC__ == 9) + // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947. + // This array ends up being compiled as all nullptrs, tripping the assertions below. + { "", nullptr, nullptr, "$" }, +#else + { "", nullptr, nullptr, nullptr }, +#endif + }; + + assert(vecsize >= 1 && vecsize <= 4); + assert(index >= 0 && index < 4); + assert(swizzle[vecsize - 1][index]); + + return swizzle[vecsize - 1][index]; +} + +void CompilerGLSL::reset() +{ + // We do some speculative optimizations which should pretty much always work out, + // but just in case the SPIR-V is rather weird, recompile until it's happy. + // This typically only means one extra pass. + clear_force_recompile(); + + // Clear invalid expression tracking. + invalid_expressions.clear(); + current_function = nullptr; + + // Clear temporary usage tracking. + expression_usage_counts.clear(); + forwarded_temporaries.clear(); + suppressed_usage_tracking.clear(); + + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + flushed_phi_variables.clear(); + + reset_name_caches(); + + ir.for_each_typed_id([&](uint32_t, SPIRFunction &func) { + func.active = false; + func.flush_undeclared = true; + }); + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); + + ir.reset_all_of_type(); + ir.reset_all_of_type(); + + statement_count = 0; + indent = 0; + current_loop_level = 0; +} + +void CompilerGLSL::remap_pls_variables() +{ + for (auto &input : pls_inputs) + { + auto &var = get(input.id); + + bool input_is_target = false; + if (var.storage == StorageClassUniformConstant) + { + auto &type = get(var.basetype); + input_is_target = type.image.dim == DimSubpassData; + } + + if (var.storage != StorageClassInput && !input_is_target) + SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); + var.remapped_variable = true; + } + + for (auto &output : pls_outputs) + { + auto &var = get(output.id); + if (var.storage != StorageClassOutput) + SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); + var.remapped_variable = true; + } +} + +void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location) +{ + subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location }); + inout_color_attachments.insert(color_location); +} + +void CompilerGLSL::find_static_extensions() +{ + ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { + if (type.basetype == SPIRType::Double) + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); + } + else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) + { + if (options.es) + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile."); + if (!options.es) + require_extension_internal("GL_ARB_gpu_shader_int64"); + } + else if (type.basetype == SPIRType::Half) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_8bit_storage"); + } + else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + }); + + auto &execution = get_entry_point(); + switch (execution.model) + { + case ExecutionModelGLCompute: + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_compute_shader"); + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); + break; + + case ExecutionModelGeometry: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_geometry_shader"); + if (!options.es && options.version < 150) + require_extension_internal("GL_ARB_geometry_shader4"); + + if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1) + { + // Instanced GS is part of 400 core or this extension. + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_gpu_shader5"); + } + break; + + case ExecutionModelTessellationEvaluation: + case ExecutionModelTessellationControl: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_tessellation_shader"); + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_tessellation_shader"); + break; + + case ExecutionModelRayGenerationNV: + case ExecutionModelIntersectionNV: + case ExecutionModelAnyHitNV: + case ExecutionModelClosestHitNV: + case ExecutionModelMissNV: + case ExecutionModelCallableNV: + if (options.es || options.version < 460) + SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above."); + require_extension_internal("GL_NV_ray_tracing"); + break; + + default: + break; + } + + if (!pls_inputs.empty() || !pls_outputs.empty()) + { + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders."); + require_extension_internal("GL_EXT_shader_pixel_local_storage"); + } + + if (!inout_color_attachments.empty()) + { + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders."); + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL."); + require_extension_internal("GL_EXT_shader_framebuffer_fetch"); + } + + if (options.separate_shader_objects && !options.es && options.version < 410) + require_extension_internal("GL_ARB_separate_shader_objects"); + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL."); + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450."); + require_extension_internal("GL_EXT_buffer_reference"); + } + else if (ir.addressing_model != AddressingModelLogical) + { + SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported."); + } + + // Check for nonuniform qualifier and passthrough. + // Instead of looping over all decorations to find this, just look at capabilities. + for (auto &cap : ir.declared_capabilities) + { + switch (cap) + { + case CapabilityShaderNonUniformEXT: + if (!options.vulkan_semantics) + require_extension_internal("GL_NV_gpu_shader5"); + else + require_extension_internal("GL_EXT_nonuniform_qualifier"); + break; + case CapabilityRuntimeDescriptorArrayEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_nonuniform_qualifier"); + break; + + case CapabilityGeometryShaderPassthroughNV: + if (execution.model == ExecutionModelGeometry) + { + require_extension_internal("GL_NV_geometry_shader_passthrough"); + execution.geometry_passthrough = true; + } + break; + + case CapabilityVariablePointers: + case CapabilityVariablePointersStorageBuffer: + SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL."); + + default: + break; + } + } +} + +string CompilerGLSL::compile() +{ + ir.fixup_reserved_names(); + + if (options.vulkan_semantics) + backend.allow_precision_qualifiers = true; + else + { + // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers + backend.nonuniform_qualifier = ""; + backend.needs_row_major_load_workaround = true; + } + backend.force_gl_in_out_block = true; + backend.supports_extensions = true; + backend.use_array_constructor = true; + + if (is_legacy_es()) + backend.support_case_fallthrough = false; + + // Scan the SPIR-V to find trivial uses of extensions. + fixup_type_alias(); + reorder_type_alias(); + build_function_control_flow_graphs_and_analyze(); + find_static_extensions(); + fixup_image_load_store_access(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); + if (!inout_color_attachments.empty()) + emit_inout_fragment_outputs_copy_to_subpass_inputs(); + + // Shaders might cast unrelated data to pointers of non-block types. + // Find all such instances and make sure we can cast the pointers to a synthesized block type. + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + analyze_non_block_pointer_types(); + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + buffer.reset(); + + emit_header(); + emit_resources(); + emit_extension_workarounds(get_execution_model()); + + emit_function(get(ir.default_entry_point), Bitset()); + + pass_count++; + } while (is_forcing_recompilation()); + + // Implement the interlocked wrapper function at the end. + // The body was implemented in lieu of main(). + if (interlocked_is_complex) + { + statement("void main()"); + begin_scope(); + statement("// Interlocks were used in a way not compatible with GLSL, this is very slow."); + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + statement("spvMainInterlockedBody();"); + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + end_scope(); + } + + // Entry point in GLSL is always main(). + get_entry_point().name = "main"; + + return buffer.str(); +} + +std::string CompilerGLSL::get_partial_source() +{ + return buffer.str(); +} + +void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const SpecializationConstant &wg_x, + const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) +{ + auto &execution = get_entry_point(); + + if (wg_x.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_x_id = ", wg_x.constant_id)); + else + arguments.push_back(join("local_size_x = ", get(wg_x.id).specialization_constant_macro_name)); + } + else + arguments.push_back(join("local_size_x = ", execution.workgroup_size.x)); + + if (wg_y.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_y_id = ", wg_y.constant_id)); + else + arguments.push_back(join("local_size_y = ", get(wg_y.id).specialization_constant_macro_name)); + } + else + arguments.push_back(join("local_size_y = ", execution.workgroup_size.y)); + + if (wg_z.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_z_id = ", wg_z.constant_id)); + else + arguments.push_back(join("local_size_z = ", get(wg_z.id).specialization_constant_macro_name)); + } + else + arguments.push_back(join("local_size_z = ", execution.workgroup_size.z)); +} + +void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature) +{ + if (options.vulkan_semantics) + { + auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature); + require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension)); + } + else + { + if (!shader_subgroup_supporter.is_feature_requested(feature)) + force_recompile(); + shader_subgroup_supporter.request_feature(feature); + } +} + +void CompilerGLSL::emit_header() +{ + auto &execution = get_entry_point(); + statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); + + if (!options.es && options.version < 420) + { + // Needed for binding = # on UBOs, etc. + if (options.enable_420pack_extension) + { + statement("#ifdef GL_ARB_shading_language_420pack"); + statement("#extension GL_ARB_shading_language_420pack : require"); + statement("#endif"); + } + // Needed for: layout(early_fragment_tests) in; + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + require_extension_internal("GL_ARB_shader_image_load_store"); + } + + // Needed for: layout(post_depth_coverage) in; + if (execution.flags.get(ExecutionModePostDepthCoverage)) + require_extension_internal("GL_ARB_post_depth_coverage"); + + // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) || + execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + { + if (options.es) + { + if (options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); + require_extension_internal("GL_NV_fragment_shader_interlock"); + } + else + { + if (options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + require_extension_internal("GL_ARB_fragment_shader_interlock"); + } + } + + for (auto &ext : forced_extensions) + { + if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") + { + // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. + // GL_AMD_gpu_shader_half_float is a superset, so try that first. + statement("#if defined(GL_AMD_gpu_shader_half_float)"); + statement("#extension GL_AMD_gpu_shader_half_float : require"); + if (!options.vulkan_semantics) + { + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + } + else + { + statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require"); + } + statement("#else"); + statement("#error No extension available for FP16."); + statement("#endif"); + } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") + { + if (options.vulkan_semantics) + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); + else + { + statement("#if defined(GL_AMD_gpu_shader_int16)"); + statement("#extension GL_AMD_gpu_shader_int16 : require"); + statement("#else"); + statement("#error No extension available for Int16."); + statement("#endif"); + } + } + else if (ext == "GL_ARB_post_depth_coverage") + { + if (options.es) + statement("#extension GL_EXT_post_depth_coverage : require"); + else + { + statement("#if defined(GL_ARB_post_depth_coverge)"); + statement("#extension GL_ARB_post_depth_coverage : require"); + statement("#else"); + statement("#extension GL_EXT_post_depth_coverage : require"); + statement("#endif"); + } + } + else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters") + { + // Soft-enable this extension on plain GLSL. + statement("#ifdef ", ext); + statement("#extension ", ext, " : enable"); + statement("#endif"); + } + else + statement("#extension ", ext, " : require"); + } + + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); + + for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++) + { + auto feature = static_cast(feature_index); + if (!shader_subgroup_supporter.is_feature_requested(feature)) + continue; + + auto exts = Supp::get_candidates_for_feature(feature, result); + if (exts.empty()) + continue; + + statement(""); + + for (auto &ext : exts) + { + const char *name = Supp::get_extension_name(ext); + const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext); + auto extra_names = Supp::get_extra_required_extension_names(ext); + statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")", + (*extra_predicate != '\0' ? " && " : ""), extra_predicate); + for (const auto &e : extra_names) + statement("#extension ", e, " : enable"); + statement("#extension ", name, " : require"); + } + + if (!Supp::can_feature_be_implemented_without_extensions(feature)) + { + statement("#else"); + statement("#error No extensions available to emulate requested subgroup feature."); + } + + statement("#endif"); + } + } + + for (auto &header : header_lines) + statement(header); + + SmallVector inputs; + SmallVector outputs; + + switch (execution.model) + { + case ExecutionModelGeometry: + if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1) + inputs.push_back(join("invocations = ", execution.invocations)); + if (execution.flags.get(ExecutionModeInputPoints)) + inputs.push_back("points"); + if (execution.flags.get(ExecutionModeInputLines)) + inputs.push_back("lines"); + if (execution.flags.get(ExecutionModeInputLinesAdjacency)) + inputs.push_back("lines_adjacency"); + if (execution.flags.get(ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags.get(ExecutionModeInputTrianglesAdjacency)) + inputs.push_back("triangles_adjacency"); + + if (!execution.geometry_passthrough) + { + // For passthrough, these are implies and cannot be declared in shader. + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + if (execution.flags.get(ExecutionModeOutputTriangleStrip)) + outputs.push_back("triangle_strip"); + if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + if (execution.flags.get(ExecutionModeOutputLineStrip)) + outputs.push_back("line_strip"); + } + break; + + case ExecutionModelTessellationControl: + if (execution.flags.get(ExecutionModeOutputVertices)) + outputs.push_back(join("vertices = ", execution.output_vertices)); + break; + + case ExecutionModelTessellationEvaluation: + if (execution.flags.get(ExecutionModeQuads)) + inputs.push_back("quads"); + if (execution.flags.get(ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags.get(ExecutionModeIsolines)) + inputs.push_back("isolines"); + if (execution.flags.get(ExecutionModePointMode)) + inputs.push_back("point_mode"); + + if (!execution.flags.get(ExecutionModeIsolines)) + { + if (execution.flags.get(ExecutionModeVertexOrderCw)) + inputs.push_back("cw"); + if (execution.flags.get(ExecutionModeVertexOrderCcw)) + inputs.push_back("ccw"); + } + + if (execution.flags.get(ExecutionModeSpacingFractionalEven)) + inputs.push_back("fractional_even_spacing"); + if (execution.flags.get(ExecutionModeSpacingFractionalOdd)) + inputs.push_back("fractional_odd_spacing"); + if (execution.flags.get(ExecutionModeSpacingEqual)) + inputs.push_back("equal_spacing"); + break; + + case ExecutionModelGLCompute: + { + if (execution.workgroup_size.constant != 0) + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro + // declarations before we can emit the work group size. + if (options.vulkan_semantics || + ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) + build_workgroup_size(inputs, wg_x, wg_y, wg_z); + } + else + { + inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); + inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); + inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); + } + break; + } + + case ExecutionModelFragment: + if (options.es) + { + switch (options.fragment.default_float_precision) + { + case Options::Lowp: + statement("precision lowp float;"); + break; + + case Options::Mediump: + statement("precision mediump float;"); + break; + + case Options::Highp: + statement("precision highp float;"); + break; + + default: + break; + } + + switch (options.fragment.default_int_precision) + { + case Options::Lowp: + statement("precision lowp int;"); + break; + + case Options::Mediump: + statement("precision mediump int;"); + break; + + case Options::Highp: + statement("precision highp int;"); + break; + + default: + break; + } + } + + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + inputs.push_back("early_fragment_tests"); + if (execution.flags.get(ExecutionModePostDepthCoverage)) + inputs.push_back("post_depth_coverage"); + + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT)) + inputs.push_back("pixel_interlock_ordered"); + else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT)) + inputs.push_back("pixel_interlock_unordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT)) + inputs.push_back("sample_interlock_ordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + inputs.push_back("sample_interlock_unordered"); + + if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) + statement("layout(depth_greater) out float gl_FragDepth;"); + else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) + statement("layout(depth_less) out float gl_FragDepth;"); + + break; + + default: + break; + } + + if (!inputs.empty()) + statement("layout(", merge(inputs), ") in;"); + if (!outputs.empty()) + statement("layout(", merge(outputs), ") out;"); + + statement(""); +} + +bool CompilerGLSL::type_is_empty(const SPIRType &type) +{ + return type.basetype == SPIRType::Struct && type.member_types.empty(); +} + +void CompilerGLSL::emit_struct(SPIRType &type) +{ + // Struct types can be stamped out multiple times + // with just different offsets, matrix layouts, etc ... + // Type-punning with these types is legal, which complicates things + // when we are storing struct and array types in an SSBO for example. + // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + return; + + add_resource_name(type.self); + auto name = type_to_glsl(type); + + statement(!backend.explicit_struct_type ? "struct " : "", name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + bool emitted = false; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + emitted = true; + } + + // Don't declare empty structs in GLSL, this is not allowed. + if (type_is_empty(type) && !backend.supports_empty_struct) + { + statement("int empty_struct_member;"); + emitted = true; + } + + if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget)) + emit_struct_padding_target(type); + + end_scope_decl(); + + if (emitted) + statement(""); +} + +string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) +{ + string res; + //if (flags & (1ull << DecorationSmooth)) + // res += "smooth "; + if (flags.get(DecorationFlat)) + res += "flat "; + if (flags.get(DecorationNoPerspective)) + res += "noperspective "; + if (flags.get(DecorationCentroid)) + res += "centroid "; + if (flags.get(DecorationPatch)) + res += "patch "; + if (flags.get(DecorationSample)) + res += "sample "; + if (flags.get(DecorationInvariant)) + res += "invariant "; + if (flags.get(DecorationExplicitInterpAMD)) + res += "__explicitInterpAMD "; + + return res; +} + +string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) +{ + if (is_legacy()) + return ""; + + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + if (!is_block) + return ""; + + auto &memb = ir.meta[type.self].members; + if (index >= memb.size()) + return ""; + auto &dec = memb[index]; + + SmallVector attr; + + if (has_member_decoration(type.self, index, DecorationPassthroughNV)) + attr.push_back("passthrough"); + + // We can only apply layouts on members in block interfaces. + // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. + // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct + // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. + // + // We would like to go from (SPIR-V style): + // + // struct Foo { layout(row_major) mat4 matrix; }; + // buffer UBO { Foo foo; }; + // + // to + // + // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. + // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. + auto flags = combined_decoration_for_member(type, index); + + if (flags.get(DecorationRowMajor)) + attr.push_back("row_major"); + // We don't emit any global layouts, so column_major is default. + //if (flags & (1ull << DecorationColMajor)) + // attr.push_back("column_major"); + + if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true)) + attr.push_back(join("location = ", dec.location)); + + // Can only declare component if we can declare location. + if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true)) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); + attr.push_back(join("component = ", dec.component)); + } + else + SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); + } + + // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. + // This is only done selectively in GLSL as needed. + if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) && + dec.decoration_flags.get(DecorationOffset)) + attr.push_back(join("offset = ", dec.offset)); + else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset)) + attr.push_back(join("xfb_offset = ", dec.offset)); + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) +{ + if (options.es && is_desktop_only_format(format)) + SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); + + switch (format) + { + case ImageFormatRgba32f: + return "rgba32f"; + case ImageFormatRgba16f: + return "rgba16f"; + case ImageFormatR32f: + return "r32f"; + case ImageFormatRgba8: + return "rgba8"; + case ImageFormatRgba8Snorm: + return "rgba8_snorm"; + case ImageFormatRg32f: + return "rg32f"; + case ImageFormatRg16f: + return "rg16f"; + case ImageFormatRgba32i: + return "rgba32i"; + case ImageFormatRgba16i: + return "rgba16i"; + case ImageFormatR32i: + return "r32i"; + case ImageFormatRgba8i: + return "rgba8i"; + case ImageFormatRg32i: + return "rg32i"; + case ImageFormatRg16i: + return "rg16i"; + case ImageFormatRgba32ui: + return "rgba32ui"; + case ImageFormatRgba16ui: + return "rgba16ui"; + case ImageFormatR32ui: + return "r32ui"; + case ImageFormatRgba8ui: + return "rgba8ui"; + case ImageFormatRg32ui: + return "rg32ui"; + case ImageFormatRg16ui: + return "rg16ui"; + case ImageFormatR11fG11fB10f: + return "r11f_g11f_b10f"; + case ImageFormatR16f: + return "r16f"; + case ImageFormatRgb10A2: + return "rgb10_a2"; + case ImageFormatR8: + return "r8"; + case ImageFormatRg8: + return "rg8"; + case ImageFormatR16: + return "r16"; + case ImageFormatRg16: + return "rg16"; + case ImageFormatRgba16: + return "rgba16"; + case ImageFormatR16Snorm: + return "r16_snorm"; + case ImageFormatRg16Snorm: + return "rg16_snorm"; + case ImageFormatRgba16Snorm: + return "rgba16_snorm"; + case ImageFormatR8Snorm: + return "r8_snorm"; + case ImageFormatRg8Snorm: + return "rg8_snorm"; + case ImageFormatR8ui: + return "r8ui"; + case ImageFormatRg8ui: + return "rg8ui"; + case ImageFormatR16ui: + return "r16ui"; + case ImageFormatRgb10a2ui: + return "rgb10_a2ui"; + case ImageFormatR8i: + return "r8i"; + case ImageFormatRg8i: + return "rg8i"; + case ImageFormatR16i: + return "r16i"; + default: + case ImageFormatUnknown: + return nullptr; + } +} + +uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) +{ + switch (type.basetype) + { + case SPIRType::Double: + case SPIRType::Int64: + case SPIRType::UInt64: + return 8; + case SPIRType::Float: + case SPIRType::Int: + case SPIRType::UInt: + return 4; + case SPIRType::Half: + case SPIRType::Short: + case SPIRType::UShort: + return 2; + case SPIRType::SByte: + case SPIRType::UByte: + return 1; + + default: + SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size."); + } +} + +uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, + BufferPackingStandard packing) +{ + // If using PhysicalStorageBufferEXT storage class, this is a pointer, + // and is 64-bit. + if (type.storage == StorageClassPhysicalStorageBufferEXT) + { + if (!type.pointer) + SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + { + if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type)) + return 16; + else + return 8; + } + else + SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); + } + + if (!type.array.empty()) + { + uint32_t minimum_alignment = 1; + if (packing_is_vec4_padded(packing)) + minimum_alignment = 16; + + auto *tmp = &get(type.parent_type); + while (!tmp->array.empty()) + tmp = &get(tmp->parent_type); + + // Get the alignment of the base type, then maybe round up. + return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing)); + } + + if (type.basetype == SPIRType::Struct) + { + // Rule 9. Structs alignments are maximum alignment of its members. + uint32_t alignment = 1; + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + alignment = + max(alignment, type_to_packed_alignment(get(type.member_types[i]), member_flags, packing)); + } + + // In std140, struct alignment is rounded up to 16. + if (packing_is_vec4_padded(packing)) + alignment = max(alignment, 16u); + + return alignment; + } + else + { + const uint32_t base_alignment = type_to_packed_base_size(type, packing); + + // Alignment requirement for scalar block layout is always the alignment for the most basic component. + if (packing_is_scalar(packing)) + return base_alignment; + + // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle + // a vec4, this is handled outside since that part knows our current offset. + if (type.columns == 1 && packing_is_hlsl(packing)) + return base_alignment; + + // From 7.6.2.2 in GL 4.5 core spec. + // Rule 1 + if (type.vecsize == 1 && type.columns == 1) + return base_alignment; + + // Rule 2 + if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) + return type.vecsize * base_alignment; + + // Rule 3 + if (type.vecsize == 3 && type.columns == 1) + return 4 * base_alignment; + + // Rule 4 implied. Alignment does not change in std430. + + // Rule 5. Column-major matrices are stored as arrays of + // vectors. + if (flags.get(DecorationColMajor) && type.columns > 1) + { + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.vecsize == 3) + return 4 * base_alignment; + else + return type.vecsize * base_alignment; + } + + // Rule 6 implied. + + // Rule 7. + if (flags.get(DecorationRowMajor) && type.vecsize > 1) + { + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.columns == 3) + return 4 * base_alignment; + else + return type.columns * base_alignment; + } + + // Rule 8 implied. + } + + SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); +} + +uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, + BufferPackingStandard packing) +{ + // Array stride is equal to aligned size of the underlying type. + uint32_t parent = type.parent_type; + assert(parent); + + auto &tmp = get(parent); + + uint32_t size = type_to_packed_size(tmp, flags, packing); + uint32_t alignment = type_to_packed_alignment(type, flags, packing); + return (size + alignment - 1) & ~(alignment - 1); +} + +uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) +{ + if (!type.array.empty()) + { + uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); + + // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size, + // so that it is possible to pack other vectors into the last element. + if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct) + packed_size -= (4 - type.vecsize) * (type.width / 8); + + return packed_size; + } + + // If using PhysicalStorageBufferEXT storage class, this is a pointer, + // and is 64-bit. + if (type.storage == StorageClassPhysicalStorageBufferEXT) + { + if (!type.pointer) + SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + return 8; + else + SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); + } + + uint32_t size = 0; + + if (type.basetype == SPIRType::Struct) + { + uint32_t pad_alignment = 1; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + auto &member_type = get(type.member_types[i]); + + uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing); + uint32_t alignment = max(packed_alignment, pad_alignment); + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (member_type.basetype == SPIRType::Struct) + pad_alignment = packed_alignment; + else + pad_alignment = 1; + + size = (size + alignment - 1) & ~(alignment - 1); + size += type_to_packed_size(member_type, member_flags, packing); + } + } + else + { + const uint32_t base_alignment = type_to_packed_base_size(type, packing); + + if (packing_is_scalar(packing)) + { + size = type.vecsize * type.columns * base_alignment; + } + else + { + if (type.columns == 1) + size = type.vecsize * base_alignment; + + if (flags.get(DecorationColMajor) && type.columns > 1) + { + if (packing_is_vec4_padded(packing)) + size = type.columns * 4 * base_alignment; + else if (type.vecsize == 3) + size = type.columns * 4 * base_alignment; + else + size = type.columns * type.vecsize * base_alignment; + } + + if (flags.get(DecorationRowMajor) && type.vecsize > 1) + { + if (packing_is_vec4_padded(packing)) + size = type.vecsize * 4 * base_alignment; + else if (type.columns == 3) + size = type.vecsize * 4 * base_alignment; + else + size = type.vecsize * type.columns * base_alignment; + } + + // For matrices in HLSL, the last element has a size which depends on its vector size, + // so that it is possible to pack other vectors into the last element. + if (packing_is_hlsl(packing) && type.columns > 1) + size -= (4 - type.vecsize) * (type.width / 8); + } + } + + return size; +} + +bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, + uint32_t *failed_validation_index, uint32_t start_offset, + uint32_t end_offset) +{ + // This is very tricky and error prone, but try to be exhaustive and correct here. + // SPIR-V doesn't directly say if we're using std430 or std140. + // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), + // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. + // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). + // + // It is almost certain that we're using std430, but it gets tricky with arrays in particular. + // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. + // + // The only two differences between std140 and std430 are related to padding alignment/array stride + // in arrays and structs. In std140 they take minimum vec4 alignment. + // std430 only removes the vec4 requirement. + + uint32_t offset = 0; + uint32_t pad_alignment = 1; + + bool is_top_level_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto &memb_type = get(type.member_types[i]); + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + + // Verify alignment rules. + uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); + + // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g: + // layout(constant_id = 0) const int s = 10; + // const int S = s + 5; // SpecConstantOp + // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here, + // we would need full implementation of compile-time constant folding. :( + // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant + // for our analysis (e.g. unsized arrays). + // This lets us simply ignore that there are spec constant op sized arrays in our buffers. + // Querying size of this member will fail, so just don't call it unless we have to. + // + // This is likely "best effort" we can support without going into unacceptably complicated workarounds. + bool member_can_be_unsized = + is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); + + uint32_t packed_size = 0; + if (!member_can_be_unsized || packing_is_hlsl(packing)) + packed_size = type_to_packed_size(memb_type, member_flags, packing); + + // We only need to care about this if we have non-array types which can straddle the vec4 boundary. + if (packing_is_hlsl(packing)) + { + // If a member straddles across a vec4 boundary, alignment is actually vec4. + uint32_t begin_word = offset / 16; + uint32_t end_word = (offset + packed_size - 1) / 16; + if (begin_word != end_word) + packed_alignment = max(packed_alignment, 16u); + } + + uint32_t actual_offset = type_struct_member_offset(type, i); + // Field is not in the specified range anymore and we can ignore any further fields. + if (actual_offset >= end_offset) + break; + + uint32_t alignment = max(packed_alignment, pad_alignment); + offset = (offset + alignment - 1) & ~(alignment - 1); + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer) + pad_alignment = packed_alignment; + else + pad_alignment = 1; + + // Only care about packing if we are in the given range + if (actual_offset >= start_offset) + { + // We only care about offsets in std140, std430, etc ... + // For EnhancedLayout variants, we have the flexibility to choose our own offsets. + if (!packing_has_flexible_offset(packing)) + { + if (actual_offset != offset) // This cannot be the packing we're looking for. + { + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + } + else if ((actual_offset & (alignment - 1)) != 0) + { + // We still need to verify that alignment rules are observed, even if we have explicit offset. + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + + // Verify array stride rules. + if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != + type_struct_member_array_stride(type, i)) + { + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + + // Verify that sub-structs also follow packing rules. + // We cannot use enhanced layouts on substructs, so they better be up to spec. + auto substruct_packing = packing_to_substruct_packing(packing); + + if (!memb_type.pointer && !memb_type.member_types.empty() && + !buffer_is_packing_standard(memb_type, substruct_packing)) + { + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + } + + // Bump size. + offset = actual_offset + packed_size; + } + + return true; +} + +bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block) +{ + // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. + // Be very explicit here about how to solve the issue. + if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) + { + uint32_t minimum_desktop_version = block ? 440 : 410; + // ARB_enhanced_layouts vs ARB_separate_shader_objects ... + + if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects) + return false; + else if (options.es && options.version < 310) + return false; + } + + if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) + { + if (options.es && options.version < 300) + return false; + else if (!options.es && options.version < 330) + return false; + } + + if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant) + { + if (options.es && options.version < 310) + return false; + else if (!options.es && options.version < 430) + return false; + } + + return true; +} + +string CompilerGLSL::layout_for_variable(const SPIRVariable &var) +{ + // FIXME: Come up with a better solution for when to disable layouts. + // Having layouts depend on extensions as well as which types + // of layouts are used. For now, the simple solution is to just disable + // layouts for legacy versions. + if (is_legacy()) + return ""; + + if (subpass_input_is_framebuffer_fetch(var.self)) + return ""; + + SmallVector attr; + + auto &type = get(var.basetype); + auto &flags = get_decoration_bitset(var.self); + auto &typeflags = get_decoration_bitset(type.self); + + if (flags.get(DecorationPassthroughNV)) + attr.push_back("passthrough"); + + if (options.vulkan_semantics && var.storage == StorageClassPushConstant) + attr.push_back("push_constant"); + else if (var.storage == StorageClassShaderRecordBufferNV) + attr.push_back("shaderRecordNV"); + + if (flags.get(DecorationRowMajor)) + attr.push_back("row_major"); + if (flags.get(DecorationColMajor)) + attr.push_back("column_major"); + + if (options.vulkan_semantics) + { + if (flags.get(DecorationInputAttachmentIndex)) + attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex))); + } + + bool is_block = has_decoration(type.self, DecorationBlock); + if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block)) + { + Bitset combined_decoration; + for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++) + combined_decoration.merge_or(combined_decoration_for_member(type, i)); + + // If our members have location decorations, we don't need to + // emit location decorations at the top as well (looks weird). + if (!combined_decoration.get(DecorationLocation)) + attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation))); + } + + // Transform feedback + bool uses_enhanced_layouts = false; + if (is_block && var.storage == StorageClassOutput) + { + // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself, + // since all members must match the same xfb_buffer. The only thing we will declare for members of the block + // is the xfb_offset. + uint32_t member_count = uint32_t(type.member_types.size()); + bool have_xfb_buffer_stride = false; + bool have_any_xfb_offset = false; + bool have_geom_stream = false; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; + + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride)) + { + have_xfb_buffer_stride = true; + xfb_buffer = get_decoration(var.self, DecorationXfbBuffer); + xfb_stride = get_decoration(var.self, DecorationXfbStride); + } + + if (flags.get(DecorationStream)) + { + have_geom_stream = true; + geom_stream = get_decoration(var.self, DecorationStream); + } + + // Verify that none of the members violate our assumption. + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationStream)) + { + uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream); + if (have_geom_stream && member_geom_stream != geom_stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = member_geom_stream; + } + + // Only members with an Offset decoration participate in XFB. + if (!has_member_decoration(type.self, i, DecorationOffset)) + continue; + have_any_xfb_offset = true; + + if (has_member_decoration(type.self, i, DecorationXfbBuffer)) + { + uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer); + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + } + + if (has_member_decoration(type.self, i, DecorationXfbStride)) + { + uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbStride mismatch."); + have_xfb_buffer_stride = true; + xfb_stride = stride; + } + } + + if (have_xfb_buffer_stride && have_any_xfb_offset) + { + attr.push_back(join("xfb_buffer = ", xfb_buffer)); + attr.push_back(join("xfb_stride = ", xfb_stride)); + uses_enhanced_layouts = true; + } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } + } + else if (var.storage == StorageClassOutput) + { + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset)) + { + // XFB for standalone variables, we can emit all decorations. + attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer))); + attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride))); + attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset))); + uses_enhanced_layouts = true; + } + + if (flags.get(DecorationStream)) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } + } + + // Can only declare Component if we can declare location. + if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block)) + { + uses_enhanced_layouts = true; + attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent))); + } + + if (uses_enhanced_layouts) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40."); + if (!options.es && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + } + else if (options.es) + SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL."); + } + + if (flags.get(DecorationIndex)) + attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex))); + + // Do not emit set = decoration in regular GLSL output, but + // we need to preserve it in Vulkan GLSL mode. + if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV) + { + if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics) + attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet))); + } + + bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; + bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || + (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); + bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; + bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock); + + // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ... + bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140); + + // pretend no UBOs when options say so + if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms) + can_use_buffer_blocks = false; + + bool can_use_binding; + if (options.es) + can_use_binding = options.version >= 310; + else + can_use_binding = options.enable_420pack_extension || (options.version >= 420); + + // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30. + if (!can_use_buffer_blocks && var.storage == StorageClassUniform) + can_use_binding = false; + + if (var.storage == StorageClassShaderRecordBufferNV) + can_use_binding = false; + + if (can_use_binding && flags.get(DecorationBinding)) + attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding))); + + if (var.storage != StorageClassOutput && flags.get(DecorationOffset)) + attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset))); + + // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. + // If SPIR-V does not comply with either layout, we cannot really work around it. + if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) + { + attr.push_back(buffer_to_packing_standard(type, false)); + } + else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) + { + attr.push_back(buffer_to_packing_standard(type, true)); + } + + // For images, the type itself adds a layout qualifer. + // Only emit the format for storage images. + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + { + const char *fmt = format_to_glsl(type.image.format); + if (fmt) + attr.push_back(fmt); + } + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout) +{ + if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430)) + return "std430"; + else if (buffer_is_packing_standard(type, BufferPackingStd140)) + return "std140"; + else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar)) + { + require_extension_internal("GL_EXT_scalar_block_layout"); + return "scalar"; + } + else if (support_std430_without_scalar_layout && + buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + { + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + return "std430"; + } + else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + { + // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, + // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. + // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + return "std140"; + } + else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) + { + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + require_extension_internal("GL_EXT_scalar_block_layout"); + return "scalar"; + } + else if (!support_std430_without_scalar_layout && options.vulkan_semantics && + buffer_is_packing_standard(type, BufferPackingStd430)) + { + // UBOs can support std430 with GL_EXT_scalar_block_layout. + require_extension_internal("GL_EXT_scalar_block_layout"); + return "std430"; + } + else if (!support_std430_without_scalar_layout && options.vulkan_semantics && + buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + { + // UBOs can support std430 with GL_EXT_scalar_block_layout. + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + require_extension_internal("GL_EXT_scalar_block_layout"); + return "std430"; + } + else + { + SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced " + "layouts. You can try flattening this block to support a more flexible layout."); + } +} + +void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) +{ + if (flattened_buffer_blocks.count(var.self)) + emit_buffer_block_flattened(var); + else if (options.vulkan_semantics) + emit_push_constant_block_vulkan(var); + else if (options.emit_push_constant_as_uniform_buffer) + emit_buffer_block_native(var); + else + emit_push_constant_block_glsl(var); +} + +void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) +{ + emit_buffer_block(var); +} + +void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) +{ + // OpenGL has no concept of push constant blocks, implement it as a uniform struct. + auto &type = get(var.basetype); + + auto &flags = ir.meta[var.self].decoration.decoration_flags; + flags.clear(DecorationBinding); + flags.clear(DecorationDescriptorSet); + +#if 0 + if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) + SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); +#endif + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + auto &block_flags = ir.meta[type.self].decoration.decoration_flags; + bool block_flag = block_flags.get(DecorationBlock); + block_flags.clear(DecorationBlock); + + emit_struct(type); + + if (block_flag) + block_flags.set(DecorationBlock); + + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock); + + if (flattened_buffer_blocks.count(var.self)) + emit_buffer_block_flattened(var); + else if (is_legacy() || (!options.es && options.version == 130) || + (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)) + emit_buffer_block_legacy(var); + else + emit_buffer_block_native(var); +} + +void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + bool ssbo = var.storage == StorageClassStorageBuffer || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + if (ssbo) + SPIRV_CROSS_THROW("SSBOs not supported in legacy targets."); + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + auto &block_flags = ir.meta[type.self].decoration.decoration_flags; + bool block_flag = block_flags.get(DecorationBlock); + block_flags.clear(DecorationBlock); + emit_struct(type); + if (block_flag) + block_flags.set(DecorationBlock); + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration) +{ + string buffer_name; + + if (forward_declaration) + { + // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... + // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration. + // The names must match up. + buffer_name = to_name(type.self, false); + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + // If we have a collision for any reason, just fallback immediately. + if (ir.meta[type.self].decoration.alias.empty() || + block_ssbo_names.find(buffer_name) != end(block_ssbo_names) || + resource_names.find(buffer_name) != end(resource_names)) + { + buffer_name = join("_", type.self); + } + + // Make sure we get something unique for both global name scope and block name scope. + // See GLSL 4.5 spec: section 4.3.9 for details. + add_variable(block_ssbo_names, resource_names, buffer_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. + if (buffer_name.empty()) + buffer_name = join("_", type.self); + + block_names.insert(buffer_name); + block_ssbo_names.insert(buffer_name); + + // Ensure we emit the correct name when emitting non-forward pointer type. + ir.meta[type.self].decoration.alias = buffer_name; + } + else if (type.basetype != SPIRType::Struct) + buffer_name = type_to_glsl(type); + else + buffer_name = to_name(type.self, false); + + if (!forward_declaration) + { + if (type.basetype == SPIRType::Struct) + { + auto flags = ir.get_buffer_block_type_flags(type); + string decorations; + if (flags.get(DecorationRestrict)) + decorations += " restrict"; + if (flags.get(DecorationCoherent)) + decorations += " coherent"; + if (flags.get(DecorationNonReadable)) + decorations += " writeonly"; + if (flags.get(DecorationNonWritable)) + decorations += " readonly"; + statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), + ")", decorations, " buffer ", buffer_name); + } + else + statement("layout(buffer_reference) buffer ", buffer_name); + + begin_scope(); + + if (type.basetype == SPIRType::Struct) + { + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + } + else + { + auto &pointee_type = get_pointee_type(type); + statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";"); + } + + end_scope_decl(); + statement(""); + } + else + { + statement("layout(buffer_reference) buffer ", buffer_name, ";"); + } +} + +void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + + Bitset flags = ir.get_buffer_block_flags(var); + bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + bool is_restrict = ssbo && flags.get(DecorationRestrict); + bool is_writeonly = ssbo && flags.get(DecorationNonReadable); + bool is_readonly = ssbo && flags.get(DecorationNonWritable); + bool is_coherent = ssbo && flags.get(DecorationCoherent); + + // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... + auto buffer_name = to_name(type.self, false); + + auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names; + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + // If we have a collision for any reason, just fallback immediately. + if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) || + resource_names.find(buffer_name) != end(resource_names)) + { + buffer_name = get_block_fallback_name(var.self); + } + + // Make sure we get something unique for both global name scope and block name scope. + // See GLSL 4.5 spec: section 4.3.9 for details. + add_variable(block_namespace, resource_names, buffer_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. + if (buffer_name.empty()) + buffer_name = join("_", get(var.basetype).self, "_", var.self); + + block_names.insert(buffer_name); + block_namespace.insert(buffer_name); + + // Save for post-reflection later. + declared_block_names[var.self] = buffer_name; + + statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "", + is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", + buffer_name); + + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + + // var.self can be used as a backup name for the block name, + // so we need to make sure we don't disturb the name here on a recompile. + // It will need to be reset if we have to recompile. + preserve_alias_on_reset(var.self); + add_resource_name(var.self); + end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); + statement(""); +} + +void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + + // Block names should never alias. + auto buffer_name = to_name(type.self, false); + size_t buffer_size = (get_declared_struct_size(type) + 15) / 16; + + SPIRType::BaseType basic_type; + if (get_common_basic_type(type, basic_type)) + { + SPIRType tmp; + tmp.basetype = basic_type; + tmp.vecsize = 4; + if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) + SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); + + auto flags = ir.get_buffer_block_flags(var); + statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", + buffer_size, "];"); + } + else + SPIRV_CROSS_THROW("All basic types in a flattened block must be the same."); +} + +const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) +{ + auto &execution = get_entry_point(); + + if (subpass_input_is_framebuffer_fetch(var.self)) + return ""; + + if (var.storage == StorageClassInput || var.storage == StorageClassOutput) + { + if (is_legacy() && execution.model == ExecutionModelVertex) + return var.storage == StorageClassInput ? "attribute " : "varying "; + else if (is_legacy() && execution.model == ExecutionModelFragment) + return "varying "; // Fragment outputs are renamed so they never hit this case. + else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) + { + if (inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0) + return "inout "; + else + return "out "; + } + else + return var.storage == StorageClassInput ? "in " : "out "; + } + else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || + var.storage == StorageClassPushConstant) + { + return "uniform "; + } + else if (var.storage == StorageClassRayPayloadNV) + { + return "rayPayloadNV "; + } + else if (var.storage == StorageClassIncomingRayPayloadNV) + { + return "rayPayloadInNV "; + } + else if (var.storage == StorageClassHitAttributeNV) + { + return "hitAttributeNV "; + } + else if (var.storage == StorageClassCallableDataNV) + { + return "callableDataNV "; + } + else if (var.storage == StorageClassIncomingCallableDataNV) + { + return "callableDataInNV "; + } + + return ""; +} + +void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices) +{ + uint32_t member_type_id = type.self; + const SPIRType *member_type = &type; + const SPIRType *parent_type = nullptr; + auto flattened_name = basename; + for (auto &index : indices) + { + flattened_name += "_"; + flattened_name += to_member_name(*member_type, index); + parent_type = member_type; + member_type_id = member_type->member_types[index]; + member_type = &get(member_type_id); + } + + assert(member_type->basetype != SPIRType::Struct); + + // We're overriding struct member names, so ensure we do so on the primary type. + if (parent_type->type_alias) + parent_type = &get(parent_type->type_alias); + + // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, + // which is not allowed. + ParsedIR::sanitize_underscores(flattened_name); + + uint32_t last_index = indices.back(); + + // Pass in the varying qualifier here so it will appear in the correct declaration order. + // Replace member name while emitting it so it encodes both struct name and member name. + auto backup_name = get_member_name(parent_type->self, last_index); + auto member_name = to_member_name(*parent_type, last_index); + set_member_name(parent_type->self, last_index, flattened_name); + emit_struct_member(*parent_type, member_type_id, last_index, qual); + // Restore member name. + set_member_name(parent_type->self, last_index, member_name); +} + +void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices) +{ + auto sub_indices = indices; + sub_indices.push_back(0); + + const SPIRType *member_type = &type; + for (auto &index : indices) + member_type = &get(member_type->member_types[index]); + + assert(member_type->basetype == SPIRType::Struct); + + if (!member_type->array.empty()) + SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks."); + + for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) + { + sub_indices.back() = i; + if (get(member_type->member_types[i]).basetype == SPIRType::Struct) + emit_flattened_io_block_struct(basename, type, qual, sub_indices); + else + emit_flattened_io_block_member(basename, type, qual, sub_indices); + } +} + +void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) +{ + auto &var_type = get(var.basetype); + if (!var_type.array.empty()) + SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); + + // Emit flattened types based on the type alias. Normally, we are never supposed to emit + // struct declarations for aliased types. + auto &type = var_type.type_alias ? get(var_type.type_alias) : var_type; + + auto old_flags = ir.meta[type.self].decoration.decoration_flags; + // Emit the members as if they are part of a block to get all qualifiers. + ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); + + type.member_name_cache.clear(); + + SmallVector member_indices; + member_indices.push_back(0); + auto basename = to_name(var.self); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + auto &membertype = get(member); + + member_indices.back() = i; + if (membertype.basetype == SPIRType::Struct) + emit_flattened_io_block_struct(basename, type, qual, member_indices); + else + emit_flattened_io_block_member(basename, type, qual, member_indices); + i++; + } + + ir.meta[type.self].decoration.decoration_flags = old_flags; + + // Treat this variable as fully flattened from now on. + flattened_structs[var.self] = true; +} + +void CompilerGLSL::emit_interface_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + + if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && + !options.es && options.version < 410) + { + require_extension_internal("GL_ARB_vertex_attrib_64bit"); + } + + // Either make it plain in/out or in/out blocks depending on what shader is doing ... + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + const char *qual = to_storage_qualifiers_glsl(var); + + if (block) + { + // ESSL earlier than 310 and GLSL earlier than 150 did not support + // I/O variables which are struct types. + // To support this, flatten the struct into separate varyings instead. + if (options.force_flattened_io_blocks || (options.es && options.version < 310) || + (!options.es && options.version < 150)) + { + // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. + // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). + emit_flattened_io_block(var, qual); + } + else + { + if (options.es && options.version < 320) + { + // Geometry and tessellation extensions imply this extension. + if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader")) + require_extension_internal("GL_EXT_shader_io_blocks"); + } + + // Block names should never alias. + auto block_name = to_name(type.self, false); + + // The namespace for I/O blocks is separate from other variables in GLSL. + auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names; + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace)) + block_name = get_fallback_name(type.self); + else + block_namespace.insert(block_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + if (block_name.empty()) + block_name = join("_", get(var.basetype).self, "_", var.self); + + // Instance names cannot alias block names. + resource_names.insert(block_name); + + statement(layout_for_variable(var), qual, block_name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + + add_resource_name(var.self); + end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); + statement(""); + } + } + else + { + // ESSL earlier than 310 and GLSL earlier than 150 did not support + // I/O variables which are struct types. + // To support this, flatten the struct into separate varyings instead. + if (type.basetype == SPIRType::Struct && + (options.force_flattened_io_blocks || (options.es && options.version < 310) || + (!options.es && options.version < 150))) + { + emit_flattened_io_block(var, qual); + } + else + { + add_resource_name(var.self); + + // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. + // Opt for unsized as it's the more "correct" variant to use. + bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() && + !has_decoration(var.self, DecorationPatch) && + (get_entry_point().model == ExecutionModelTessellationControl || + get_entry_point().model == ExecutionModelTessellationEvaluation); + + uint32_t old_array_size = 0; + bool old_array_size_literal = true; + + if (control_point_input_array) + { + swap(type.array.back(), old_array_size); + swap(type.array_size_literal.back(), old_array_size_literal); + } + + statement(layout_for_variable(var), to_qualifiers_glsl(var.self), + variable_decl(type, to_name(var.self), var.self), ";"); + + if (control_point_input_array) + { + swap(type.array.back(), old_array_size); + swap(type.array_size_literal.back(), old_array_size_literal); + } + + // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). + if (var.storage == StorageClassOutput && var.initializer) + { + auto &entry_func = this->get(ir.default_entry_point); + entry_func.fixup_hooks_in.push_back( + [&]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); }); + } + } + } +} + +void CompilerGLSL::emit_uniform(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) + { + if (!options.es && options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); + } + + add_resource_name(var.self); + statement(layout_for_variable(var), variable_decl(var), ";"); +} + +string CompilerGLSL::constant_value_macro_name(uint32_t id) +{ + return join("SPIRV_CROSS_CONSTANT_ID_", id); +} + +void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) +{ + auto &type = get(constant.basetype); + auto name = to_name(constant.self); + statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); +} + +void CompilerGLSL::emit_constant(const SPIRConstant &constant) +{ + auto &type = get(constant.constant_type); + auto name = to_name(constant.self); + + SpecializationConstant wg_x, wg_y, wg_z; + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + // This specialization constant is implicitly declared by emitting layout() in; + if (constant.self == workgroup_size_id) + return; + + // These specialization constants are implicitly declared by emitting layout() in; + // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration + // later can use macro overrides for work group size. + bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || + ConstantID(constant.self) == wg_z.id; + + if (options.vulkan_semantics && is_workgroup_size_constant) + { + // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout(). + return; + } + else if (!options.vulkan_semantics && is_workgroup_size_constant && + !has_decoration(constant.self, DecorationSpecId)) + { + // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros. + return; + } + + // Only scalars have constant IDs. + if (has_decoration(constant.self, DecorationSpecId)) + { + if (options.vulkan_semantics) + { + statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ", + variable_decl(type, name), " = ", constant_expression(constant), ";"); + } + else + { + const string ¯o_name = constant.specialization_constant_macro_name; + statement("#ifndef ", macro_name); + statement("#define ", macro_name, " ", constant_expression(constant)); + statement("#endif"); + + // For workgroup size constants, only emit the macros. + if (!is_workgroup_size_constant) + statement("const ", variable_decl(type, name), " = ", macro_name, ";"); + } + } + else + { + statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); + } +} + +void CompilerGLSL::emit_entry_point_declarations() +{ +} + +void CompilerGLSL::replace_illegal_names(const unordered_set &keywords) +{ + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (is_hidden_variable(var)) + return; + + auto *meta = ir.find_meta(var.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + }); + + ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { + auto *meta = ir.find_meta(type.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + + for (auto &memb : meta->members) + if (memb.alias.compare(0, 3, "gl_") == 0 || keywords.find(memb.alias) != end(keywords)) + memb.alias = join("_", memb.alias); + }); +} + +void CompilerGLSL::replace_illegal_names() +{ + // clang-format off + static const unordered_set keywords = { + "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh", + "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement", + "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor", + "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse", + "ceil", "cos", "cosh", "cross", "degrees", + "dFdx", "dFdxCoarse", "dFdxFine", + "dFdy", "dFdyCoarse", "dFdyFine", + "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2", + "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", + "frexp", "fwidth", "fwidthCoarse", "fwidthFine", + "greaterThan", "greaterThanEqual", "groupMemoryBarrier", + "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor", + "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample", + "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2", + "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared", + "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual", + "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8", + "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow", + "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step", + "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets", + "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad", + "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize", + "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16", + "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow", + + "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer", + "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", + "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", + "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", + "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", + "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", + "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", + "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", + "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", + "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp", + "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump", + "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly", + "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", + "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", + "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", + "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static", + "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", + "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", + "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", + "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", + "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", + "while", "writeonly", + }; + // clang-format on + + replace_illegal_names(keywords); +} + +void CompilerGLSL::replace_fragment_output(SPIRVariable &var) +{ + auto &m = ir.meta[var.self].decoration; + uint32_t location = 0; + if (m.decoration_flags.get(DecorationLocation)) + location = m.location; + + // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will + // do the access chain part of this for us. + auto &type = get(var.basetype); + + if (type.array.empty()) + { + // Redirect the write to a specific render target in legacy GLSL. + m.alias = join("gl_FragData[", location, "]"); + + if (is_legacy_es() && location != 0) + require_extension_internal("GL_EXT_draw_buffers"); + } + else if (type.array.size() == 1) + { + // If location is non-zero, we probably have to add an offset. + // This gets really tricky since we'd have to inject an offset in the access chain. + // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. + m.alias = "gl_FragData"; + if (location != 0) + SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " + "This is unimplemented in SPIRV-Cross."); + + if (is_legacy_es()) + require_extension_internal("GL_EXT_draw_buffers"); + } + else + SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); + + var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. +} + +void CompilerGLSL::replace_fragment_outputs() +{ + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) + replace_fragment_output(var); + }); +} + +string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) +{ + if (out_type.vecsize == input_components) + return expr; + else if (input_components == 1 && !backend.can_swizzle_scalar) + return join(type_to_glsl(out_type), "(", expr, ")"); + else + { + // FIXME: This will not work with packed expressions. + auto e = enclose_expression(expr) + "."; + // Just clamp the swizzle index if we have more outputs than inputs. + for (uint32_t c = 0; c < out_type.vecsize; c++) + e += index_to_swizzle(min(c, input_components - 1)); + if (backend.swizzle_is_function && out_type.vecsize > 1) + e += "()"; + + remove_duplicate_swizzle(e); + return e; + } +} + +void CompilerGLSL::emit_pls() +{ + auto &execution = get_entry_point(); + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); + + if (!options.es) + SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); + + if (options.version < 300) + SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); + + if (!pls_inputs.empty()) + { + statement("__pixel_local_inEXT _PLSIn"); + begin_scope(); + for (auto &input : pls_inputs) + statement(pls_decl(input), ";"); + end_scope_decl(); + statement(""); + } + + if (!pls_outputs.empty()) + { + statement("__pixel_local_outEXT _PLSOut"); + begin_scope(); + for (auto &output : pls_outputs) + statement(pls_decl(output), ";"); + end_scope_decl(); + statement(""); + } +} + +void CompilerGLSL::fixup_image_load_store_access() +{ + if (!options.enable_storage_image_qualifier_deduction) + return; + + ir.for_each_typed_id([&](uint32_t var, const SPIRVariable &) { + auto &vartype = expression_type(var); + if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) + { + // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. + // Solve this by making the image access as restricted as possible and loosen up if we need to. + // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. + + auto &flags = ir.meta[var].decoration.decoration_flags; + if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable)) + { + flags.set(DecorationNonWritable); + flags.set(DecorationNonReadable); + } + } + }); +} + +static bool is_block_builtin(BuiltIn builtin) +{ + return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance; +} + +bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) +{ + // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block. + + if (storage != StorageClassOutput) + return false; + bool should_force = false; + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (should_force) + return; + + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t member_count = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationBuiltIn) && + is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) && + has_member_decoration(type.self, i, DecorationOffset)) + { + should_force = true; + } + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) && + has_decoration(var.self, DecorationOffset)) + { + should_force = true; + } + } + }); + + return should_force; +} + +void CompilerGLSL::fixup_implicit_builtin_block_names() +{ + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && + is_builtin_variable(var)) + { + // Make sure the array has a supported name in the code. + if (var.storage == StorageClassOutput) + set_name(var.self, "gl_out"); + else if (var.storage == StorageClassInput) + set_name(var.self, "gl_in"); + } + }); +} + +void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) +{ + Bitset emitted_builtins; + Bitset global_builtins; + const SPIRVariable *block_var = nullptr; + bool emitted_block = false; + bool builtin_array = false; + + // Need to use declared size in the type. + // These variables might have been declared, but not statically used, so we haven't deduced their size yet. + uint32_t cull_distance_size = 0; + uint32_t clip_distance_size = 0; + + bool have_xfb_buffer_stride = false; + bool have_geom_stream = false; + bool have_any_xfb_offset = false; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; + std::unordered_map builtin_xfb_offsets; + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + Bitset builtins; + + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t index = 0; + for (auto &m : ir.meta[type.self].members) + { + if (m.builtin) + { + builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = to_array_size_literal(this->get(type.member_types[index])); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = to_array_size_literal(this->get(type.member_types[index])); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = m.stream; + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + index++; + } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) && + has_decoration(var.self, DecorationXfbStride)) + { + uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer); + uint32_t stride = get_decoration(var.self, DecorationXfbStride); + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + // While we're at it, collect all declared global builtins (HLSL mostly ...). + auto &m = ir.meta[var.self].decoration; + if (m.builtin) + { + global_builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = to_array_size_literal(type); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = to_array_size_literal(type); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && + m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + uint32_t buffer_index = m.xfb_buffer; + uint32_t stride = m.xfb_stride; + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + } + + if (builtins.empty()) + return; + + if (emitted_block) + SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); + + emitted_builtins = builtins; + emitted_block = true; + builtin_array = !type.array.empty(); + block_var = &var; + }); + + global_builtins = + Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | + (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance))); + + // Try to collect all other declared builtins. + if (!emitted_block) + emitted_builtins = global_builtins; + + // Can't declare an empty interface block. + if (emitted_builtins.empty()) + return; + + if (storage == StorageClassOutput) + { + SmallVector attr; + if (have_xfb_buffer_stride && have_any_xfb_offset) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); + if (!options.es && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + } + else if (options.es) + SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer."); + attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride)); + } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", geom_stream)); + } + + if (!attr.empty()) + statement("layout(", merge(attr), ") out gl_PerVertex"); + else + statement("out gl_PerVertex"); + } + else + { + // If we have passthrough, there is no way PerVertex cannot be passthrough. + if (get_entry_point().geometry_passthrough) + statement("layout(passthrough) in gl_PerVertex"); + else + statement("in gl_PerVertex"); + } + + begin_scope(); + if (emitted_builtins.get(BuiltInPosition)) + { + auto itr = builtin_xfb_offsets.find(BuiltInPosition); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;"); + else + statement("vec4 gl_Position;"); + } + + if (emitted_builtins.get(BuiltInPointSize)) + { + auto itr = builtin_xfb_offsets.find(BuiltInPointSize); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;"); + else + statement("float gl_PointSize;"); + } + + if (emitted_builtins.get(BuiltInClipDistance)) + { + auto itr = builtin_xfb_offsets.find(BuiltInClipDistance); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];"); + else + statement("float gl_ClipDistance[", clip_distance_size, "];"); + } + + if (emitted_builtins.get(BuiltInCullDistance)) + { + auto itr = builtin_xfb_offsets.find(BuiltInCullDistance); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];"); + else + statement("float gl_CullDistance[", cull_distance_size, "];"); + } + + if (builtin_array) + { + if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) + end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]")); + else + end_scope_decl(join(to_name(block_var->self), "[]")); + } + else + end_scope_decl(); + statement(""); +} + +void CompilerGLSL::declare_undefined_values() +{ + bool emitted = false; + ir.for_each_typed_id([&](uint32_t, const SPIRUndef &undef) { + auto &type = this->get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); + + statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); + emitted = true; + }); + + if (emitted) + statement(""); +} + +bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const +{ + bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; + + if (statically_assigned) + { + auto *constant = maybe_get(var.static_expression); + if (constant && constant->is_used_as_lut) + return true; + } + + return false; +} + +void CompilerGLSL::emit_resources() +{ + auto &execution = get_entry_point(); + + replace_illegal_names(); + + // Legacy GL uses gl_FragData[], redeclare all fragment outputs + // with builtins. + if (execution.model == ExecutionModelFragment && is_legacy()) + replace_fragment_outputs(); + + // Emit PLS blocks if we have such variables. + if (!pls_inputs.empty() || !pls_outputs.empty()) + emit_pls(); + + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + fixup_implicit_builtin_block_names(); + break; + + default: + break; + } + + // Emit custom gl_PerVertex for SSO compatibility. + if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) + { + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + emit_declared_builtin_block(StorageClassInput, execution.model); + emit_declared_builtin_block(StorageClassOutput, execution.model); + break; + + case ExecutionModelVertex: + emit_declared_builtin_block(StorageClassOutput, execution.model); + break; + + default: + break; + } + } + else if (should_force_emit_builtin_block(StorageClassOutput)) + { + emit_declared_builtin_block(StorageClassOutput, execution.model); + } + else if (execution.geometry_passthrough) + { + // Need to declare gl_in with Passthrough. + // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass. + emit_declared_builtin_block(StorageClassInput, execution.model); + } + else + { + // Need to redeclare clip/cull distance with explicit size to use them. + // SPIR-V mandates these builtins have a size declared. + const char *storage = execution.model == ExecutionModelFragment ? "in" : "out"; + if (clip_distance_count != 0) + statement(storage, " float gl_ClipDistance[", clip_distance_count, "];"); + if (cull_distance_count != 0) + statement(storage, " float gl_CullDistance[", cull_distance_count, "];"); + if (clip_distance_count != 0 || cull_distance_count != 0) + statement(""); + } + + if (position_invariant) + { + statement("invariant gl_Position;"); + statement(""); + } + + bool emitted = false; + + // If emitted Vulkan GLSL, + // emit specialization constants as actual floats, + // spec op expressions will redirect to the constant name. + // + { + auto loop_lock = ir.create_loop_hard_lock(); + for (auto &id_ : ir.ids_for_constant_or_type) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + + bool needs_declaration = c.specialization || c.is_used_as_lut; + + if (needs_declaration) + { + if (!options.vulkan_semantics && c.specialization) + { + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + } + emit_constant(c); + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get()); + emitted = true; + } + else if (id.get_type() == TypeType) + { + auto *type = &id.get(); + + bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer && + (!has_decoration(type->self, DecorationBlock) && + !has_decoration(type->self, DecorationBufferBlock)); + + // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs. + if (type->basetype == SPIRType::Struct && type->pointer && + has_decoration(type->self, DecorationBlock) && + (type->storage == StorageClassRayPayloadNV || type->storage == StorageClassIncomingRayPayloadNV || + type->storage == StorageClassHitAttributeNV)) + { + type = &get(type->parent_type); + is_natural_struct = true; + } + + if (is_natural_struct) + { + if (emitted) + statement(""); + emitted = false; + + emit_struct(*type); + } + } + } + } + + if (emitted) + statement(""); + + // If we needed to declare work group size late, check here. + // If the work group size depends on a specialization constant, we need to declare the layout() block + // after constants (and their macros) have been declared. + if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && + execution.workgroup_size.constant != 0) + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) + { + SmallVector inputs; + build_workgroup_size(inputs, wg_x, wg_y, wg_z); + statement("layout(", merge(inputs), ") in;"); + statement(""); + } + } + + emitted = false; + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + { + for (auto type : physical_storage_non_block_pointer_types) + { + emit_buffer_reference_block(get(type), false); + } + + // Output buffer reference blocks. + // Do this in two stages, one with forward declaration, + // and one without. Buffer reference blocks can reference themselves + // to support things like linked lists. + ir.for_each_typed_id([&](uint32_t, SPIRType &type) { + bool has_block_flags = has_decoration(type.self, DecorationBlock); + if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && + type.storage == StorageClassPhysicalStorageBufferEXT) + { + emit_buffer_reference_block(type, true); + } + }); + + ir.for_each_typed_id([&](uint32_t, SPIRType &type) { + bool has_block_flags = has_decoration(type.self, DecorationBlock); + if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && + type.storage == StorageClassPhysicalStorageBufferEXT) + { + emit_buffer_reference_block(type, false); + } + }); + } + + // Output UBOs and SSBOs + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || + type.storage == StorageClassShaderRecordBufferNV; + bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && + has_block_flags) + { + emit_buffer_block(var); + } + }); + + // Output push constant blocks + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + } + }); + + bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; + + // Output Uniform Constants (values, samplers, images, etc). + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + // Sampler buffers are always used without a sampler, and they will also work in regular GL. + bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (!sampler_buffer && (separate_image || separate_sampler)) + return; + } + + if (var.storage != StorageClassFunction && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || + type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV || + type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV || + type.storage == StorageClassHitAttributeNV) && + !is_hidden_variable(var)) + { + emit_uniform(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + bool emitted_base_instance = false; + + // Output in/out interfaces. + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + bool is_hidden = is_hidden_variable(var); + + // Unused output I/O variables might still be required to implement framebuffer fetch. + if (var.storage == StorageClassOutput && !is_legacy() && + inout_color_attachments.count(get_decoration(var.self, DecorationLocation)) != 0) + { + is_hidden = false; + } + + if (var.storage != StorageClassFunction && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self) && !is_hidden) + { + emit_interface_block(var); + emitted = true; + } + else if (is_builtin_variable(var)) + { + auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + // For gl_InstanceIndex emulation on GLES, the API user needs to + // supply this uniform. + + // The draw parameter extension is soft-enabled on GL with some fallbacks. + if (!options.vulkan_semantics) + { + if (!emitted_base_instance && + ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || + (builtin == BuiltInBaseInstance))) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseInstance;"); + statement("#endif"); + emitted = true; + emitted_base_instance = true; + } + else if (builtin == BuiltInBaseVertex) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseVertex;"); + statement("#endif"); + } + else if (builtin == BuiltInDrawIndex) + { + statement("#ifndef GL_ARB_shader_draw_parameters"); + // Cannot really be worked around. + statement("#error GL_ARB_shader_draw_parameters is not supported."); + statement("#endif"); + } + } + } + }); + + // Global variables. + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage != StorageClassOutput) + { + if (!variable_is_lut(var)) + { + add_resource_name(var.self); + + string initializer; + if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && + !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); + } + + statement(variable_decl(var), initializer, ";"); + emitted = true; + } + } + } + + if (emitted) + statement(""); + + declare_undefined_values(); +} + +void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) +{ + static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", + "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" }; + + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupSize gl_WarpSizeNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupSize gl_SubGroupSizeARB"); + break; + case Supp::AMD_gcn_shader: + statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupID gl_WarpIDNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups)) + { + auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_NumSubgroups gl_WarpsPerSMNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBrodcast_First)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBrodcast_First, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_shuffle: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }"); + } + break; + case Supp::ARB_shader_ballot: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return readFirstInvocationARB(value); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return readInvocationARB(value, id); }"); + } + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }"); + statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }"); + break; + default: + break; + } + } + statement("#else"); + statement("uint subgroupBallotFindLSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findLSB(value.x);"); + statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));"); + end_scope(); + statement("uint subgroupBallotFindMSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findMSB(value.y);"); + statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_gpu_shader_5: + statement("bool subgroupAll(bool value) { return allThreadsNV(value); }"); + statement("bool subgroupAny(bool value) { return anyThreadNV(value); }"); + statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }"); + break; + case Supp::ARB_shader_group_vote: + statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }"); + statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }"); + statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }"); + break; + case Supp::AMD_gcn_shader: + statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }"); + statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }"); + statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || " + "b == ballotAMD(true); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT)) + { + statement("#ifndef GL_KHR_shader_subgroup_vote"); + statement( + "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " + "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }"); + for (const char *t : workaround_types) + statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")"); + statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND"); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }"); + break; + case Supp::ARB_shader_ballot: + statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect)) + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("bool subgroupElect()"); + begin_scope(); + statement("uvec4 activeMask = subgroupBallot(true);"); + statement("uint firstLive = subgroupBallotFindLSB(activeMask);"); + statement("return gl_SubgroupInvocationID == firstLive;"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier)) + { + // Extensions we're using in place of GL_KHR_shader_subgroup_basic state + // that subgroup execute in lockstep so this barrier is implicit. + // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier, + // and a specific test of optimizing scans by leveraging lock-step invocation execution, + // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`. + // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19 + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupBarrier() { memoryBarrierShared(); }"); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier)) + { + if (model == spv::ExecutionModelGLCompute) + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }"); + statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }"); + statement("#endif"); + } + else + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { memoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }"); + statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }"); + statement("#endif"); + } + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupInverseBallot(uvec4 value)"); + begin_scope(); + statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));"); + end_scope(); + + statement("uint subgroupBallotInclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + + statement("uint subgroupBallotExclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("uint subgroupBallotBitCount(uvec4 value)"); + begin_scope(); + statement("ivec2 c = bitCount(value.xy);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupBallotBitExtract(uvec4 value, uint index)"); + begin_scope(); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("uint shifted = value.x >> index;"); + statement_no_indent("#else"); + statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);"); + statement_no_indent("#endif"); + statement("return (shifted & 1u) != 0u;"); + end_scope(); + statement("#endif"); + statement(""); + } + } + + if (!workaround_ubo_load_overload_types.empty()) + { + for (auto &type_id : workaround_ubo_load_overload_types) + { + auto &type = get(type_id); + statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), + " wrap) { return wrap; }"); + } + statement(""); + } + + if (requires_transpose_2x2) + { + statement("mat2 spvTranspose(mat2 m)"); + begin_scope(); + statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);"); + end_scope(); + statement(""); + } + + if (requires_transpose_3x3) + { + statement("mat3 spvTranspose(mat3 m)"); + begin_scope(); + statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);"); + end_scope(); + statement(""); + } + + if (requires_transpose_4x4) + { + statement("mat4 spvTranspose(mat4 m)"); + begin_scope(); + statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], " + "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);"); + end_scope(); + statement(""); + } +} + +// Returns a string representation of the ID, usable as a function arg. +// Default is to simply return the expression representation fo the arg ID. +// Subclasses may override to modify the return value. +string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) +{ + // Make sure that we use the name of the original variable, and not the parameter alias. + uint32_t name_id = id; + auto *var = maybe_get(id); + if (var && var->basevariable) + name_id = var->basevariable; + return to_expression(name_id); +} + +void CompilerGLSL::handle_invalid_expression(uint32_t id) +{ + // We tried to read an invalidated expression. + // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated. + forced_temporaries.insert(id); + force_recompile(); +} + +// Converts the format of the current expression from packed to unpacked, +// by wrapping the expression in a constructor of the appropriate type. +// GLSL does not support packed formats, so simply return the expression. +// Subclasses that do will override. +string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) +{ + return expr_str; +} + +// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. +void CompilerGLSL::strip_enclosed_expression(string &expr) +{ + if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') + return; + + // Have to make sure that our first and last parens actually enclose everything inside it. + uint32_t paren_count = 0; + for (auto &c : expr) + { + if (c == '(') + paren_count++; + else if (c == ')') + { + paren_count--; + + // If we hit 0 and this is not the final char, our first and final parens actually don't + // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). + if (paren_count == 0 && &c != &expr.back()) + return; + } + } + expr.erase(expr.size() - 1, 1); + expr.erase(begin(expr)); +} + +string CompilerGLSL::enclose_expression(const string &expr) +{ + bool need_parens = false; + + // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back + // unary expressions. + if (!expr.empty()) + { + auto c = expr.front(); + if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') + need_parens = true; + } + + if (!need_parens) + { + uint32_t paren_count = 0; + for (auto c : expr) + { + if (c == '(' || c == '[') + paren_count++; + else if (c == ')' || c == ']') + { + assert(paren_count); + paren_count--; + } + else if (c == ' ' && paren_count == 0) + { + need_parens = true; + break; + } + } + assert(paren_count == 0); + } + + // If this expression contains any spaces which are not enclosed by parentheses, + // we need to enclose it so we can treat the whole string as an expression. + // This happens when two expressions have been part of a binary op earlier. + if (need_parens) + return join('(', expr, ')'); + else + return expr; +} + +string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) +{ + // If this expression starts with an address-of operator ('&'), then + // just return the part after the operator. + // TODO: Strip parens if unnecessary? + if (expr.front() == '&') + return expr.substr(1); + else if (backend.native_pointers) + return join('*', expr); + else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct && + expr_type.pointer_depth == 1) + { + return join(enclose_expression(expr), ".value"); + } + else + return expr; +} + +string CompilerGLSL::address_of_expression(const std::string &expr) +{ + if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') + { + // If we have an expression which looks like (*foo), taking the address of it is the same as stripping + // the first two and last characters. We might have to enclose the expression. + // This doesn't work for cases like (*foo + 10), + // but this is an r-value expression which we cannot take the address of anyways. + return enclose_expression(expr.substr(2, expr.size() - 3)); + } + else if (expr.front() == '*') + { + // If this expression starts with a dereference operator ('*'), then + // just return the part after the operator. + return expr.substr(1); + } + else + return join('&', enclose_expression(expr)); +} + +// Just like to_expression except that we enclose the expression inside parentheses if needed. +string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) +{ + return enclose_expression(to_expression(id, register_expression_read)); +} + +// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. +// need_transpose must be forced to false. +string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) +{ + return unpack_expression_type(to_expression(id), expression_type(id), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true); +} + +string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) +{ + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get(id); + bool need_transpose = e && e->need_transpose; + bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + + if (!need_transpose && (is_remapped || is_packed)) + { + return unpack_expression_type(to_expression(id, register_expression_read), + get_pointee_type(expression_type_id(id)), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); + } + else + return to_expression(id, register_expression_read); +} + +string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) +{ + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get(id); + bool need_transpose = e && e->need_transpose; + bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (!need_transpose && (is_remapped || is_packed)) + { + return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); + } + else + return to_enclosed_expression(id, register_expression_read); +} + +string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && should_dereference(id)) + return dereference_expression(type, to_enclosed_expression(id, register_expression_read)); + else + return to_expression(id, register_expression_read); +} + +string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_enclosed_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) +{ + auto expr = to_enclosed_expression(id); + if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked)) + return join(expr, "[", index, "]"); + else + return join(expr, ".", index_to_swizzle(index)); +} + +string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type) +{ + uint32_t size = to_array_size_literal(type); + auto &parent = get(type.parent_type); + string expr = "{ "; + + for (uint32_t i = 0; i < size; i++) + { + auto subexpr = join(base_expr, "[", convert_to_string(i), "]"); + if (parent.array.empty()) + expr += subexpr; + else + expr += to_rerolled_array_expression(subexpr, parent); + + if (i + 1 < size) + expr += ", "; + } + + expr += " }"; + return expr; +} + +string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset) +{ + auto &type = expression_type(id); + + bool reroll_array = !type.array.empty() && (!backend.array_is_value_type || + (uses_buffer_offset && !backend.buffer_offset_array_is_value_type)); + + if (reroll_array) + { + // For this case, we need to "re-roll" an array initializer from a temporary. + // We cannot simply pass the array directly, since it decays to a pointer and it cannot + // participate in a struct initializer. E.g. + // float arr[2] = { 1.0, 2.0 }; + // Foo foo = { arr }; must be transformed to + // Foo foo = { { arr[0], arr[1] } }; + // The array sizes cannot be deduced from specialization constants since we cannot use any loops. + + // We're only triggering one read of the array expression, but this is fine since arrays have to be declared + // as temporaries anyways. + return to_rerolled_array_expression(to_enclosed_expression(id), type); + } + else + return to_unpacked_expression(id); +} + +string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) +{ + auto itr = invalid_expressions.find(id); + if (itr != end(invalid_expressions)) + handle_invalid_expression(id); + + if (ir.ids[id].get_type() == TypeExpression) + { + // We might have a more complex chain of dependencies. + // A possible scenario is that we + // + // %1 = OpLoad + // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. + // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. + // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. + // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. + // + // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, + // and see that we should not forward reads of the original variable. + auto &expr = get(id); + for (uint32_t dep : expr.expression_dependencies) + if (invalid_expressions.find(dep) != end(invalid_expressions)) + handle_invalid_expression(dep); + } + + if (register_expression_read) + track_expression_read(id); + + switch (ir.ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get(id); + if (e.base_expression) + return to_enclosed_expression(e.base_expression) + e.expression; + else if (e.need_transpose) + { + // This should not be reached for access chains, since we always deal explicitly with transpose state + // when consuming an access chain expression. + uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + return convert_row_major_matrix(e.expression, get(e.expression_type), physical_type_id, + is_packed); + } + else if (flattened_structs.count(id)) + { + return load_flattened_struct(e.expression, get(e.expression_type)); + } + else + { + if (is_forcing_recompilation()) + { + // During first compilation phase, certain expression patterns can trigger exponential growth of memory. + // Avoid this by returning dummy expressions during this phase. + // Do not use empty expressions here, because those are sentinels for other cases. + return "_"; + } + else + return e.expression; + } + } + + case TypeConstant: + { + auto &c = get(id); + auto &type = get(c.constant_type); + + // WorkGroupSize may be a constant. + auto &dec = ir.meta[c.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type, StorageClassGeneric); + else if (c.specialization) + return to_name(id); + else if (c.is_used_as_lut) + return to_name(id); + else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + return to_name(id); + else if (!type.array.empty() && !backend.can_declare_arrays_inline) + return to_name(id); + else + return constant_expression(c); + } + + case TypeConstantOp: + return to_name(id); + + case TypeVariable: + { + auto &var = get(id); + // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, + // the variable has not been declared yet. + if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) + return to_expression(var.static_expression); + else if (var.deferred_declaration) + { + var.deferred_declaration = false; + return variable_decl(var); + } + else if (flattened_structs.count(id)) + { + return load_flattened_struct(to_name(id), get(var.basetype)); + } + else + { + auto &dec = ir.meta[var.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type, var.storage); + else + return to_name(id); + } + } + + case TypeCombinedImageSampler: + // This type should never be taken the expression of directly. + // The intention is that texture sampling functions will extract the image and samplers + // separately and take their expressions as needed. + // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler + // expression ala sampler2D(texture, sampler). + SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); + + case TypeAccessChain: + // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. + SPIRV_CROSS_THROW("Access chains have no default expression representation."); + + default: + return to_name(id); + } +} + +string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) +{ + auto &type = get(cop.basetype); + bool binary = false; + bool unary = false; + string op; + + if (is_legacy() && is_unsigned_opcode(cop.opcode)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); + + // TODO: Find a clean way to reuse emit_instruction. + switch (cop.opcode) + { + case OpSConvert: + case OpUConvert: + case OpFConvert: + op = type_to_glsl_constructor(type); + break; + +#define GLSL_BOP(opname, x) \ + case Op##opname: \ + binary = true; \ + op = x; \ + break + +#define GLSL_UOP(opname, x) \ + case Op##opname: \ + unary = true; \ + op = x; \ + break + + GLSL_UOP(SNegate, "-"); + GLSL_UOP(Not, "~"); + GLSL_BOP(IAdd, "+"); + GLSL_BOP(ISub, "-"); + GLSL_BOP(IMul, "*"); + GLSL_BOP(SDiv, "/"); + GLSL_BOP(UDiv, "/"); + GLSL_BOP(UMod, "%"); + GLSL_BOP(SMod, "%"); + GLSL_BOP(ShiftRightLogical, ">>"); + GLSL_BOP(ShiftRightArithmetic, ">>"); + GLSL_BOP(ShiftLeftLogical, "<<"); + GLSL_BOP(BitwiseOr, "|"); + GLSL_BOP(BitwiseXor, "^"); + GLSL_BOP(BitwiseAnd, "&"); + GLSL_BOP(LogicalOr, "||"); + GLSL_BOP(LogicalAnd, "&&"); + GLSL_UOP(LogicalNot, "!"); + GLSL_BOP(LogicalEqual, "=="); + GLSL_BOP(LogicalNotEqual, "!="); + GLSL_BOP(IEqual, "=="); + GLSL_BOP(INotEqual, "!="); + GLSL_BOP(ULessThan, "<"); + GLSL_BOP(SLessThan, "<"); + GLSL_BOP(ULessThanEqual, "<="); + GLSL_BOP(SLessThanEqual, "<="); + GLSL_BOP(UGreaterThan, ">"); + GLSL_BOP(SGreaterThan, ">"); + GLSL_BOP(UGreaterThanEqual, ">="); + GLSL_BOP(SGreaterThanEqual, ">="); + + case OpSelect: + { + if (cop.arguments.size() < 3) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // This one is pretty annoying. It's triggered from + // uint(bool), int(bool) from spec constants. + // In order to preserve its compile-time constness in Vulkan GLSL, + // we need to reduce the OpSelect expression back to this simplified model. + // If we cannot, fail. + if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) + { + // Implement as a simple cast down below. + } + else + { + // Implement a ternary and pray the compiler understands it :) + return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]); + } + break; + } + + case OpVectorShuffle: + { + string expr = type_to_glsl_constructor(type); + expr += "("; + + uint32_t left_components = expression_type(cop.arguments[0]).vecsize; + string left_arg = to_enclosed_expression(cop.arguments[0]); + string right_arg = to_enclosed_expression(cop.arguments[1]); + + for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) + { + uint32_t index = cop.arguments[i]; + if (index >= left_components) + expr += right_arg + "." + "xyzw"[index - left_components]; + else + expr += left_arg + "." + "xyzw"[index]; + + if (i + 1 < uint32_t(cop.arguments.size())) + expr += ", "; + } + + expr += ")"; + return expr; + } + + case OpCompositeExtract: + { + auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + return expr; + } + + case OpCompositeInsert: + SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported."); + + default: + // Some opcodes are unimplemented here, these are currently not possible to test from glslang. + SPIRV_CROSS_THROW("Unimplemented spec constant op."); + } + + uint32_t bit_width = 0; + if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) + bit_width = expression_type(cop.arguments[0]).width; + + SPIRType::BaseType input_type; + bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); + + switch (cop.opcode) + { + case OpIEqual: + case OpINotEqual: + input_type = to_signed_basetype(bit_width); + break; + + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + case OpSMod: + case OpSDiv: + case OpShiftRightArithmetic: + case OpSConvert: + case OpSNegate: + input_type = to_signed_basetype(bit_width); + break; + + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: + case OpUMod: + case OpUDiv: + case OpShiftRightLogical: + case OpUConvert: + input_type = to_unsigned_basetype(bit_width); + break; + + default: + input_type = type.basetype; + break; + } + +#undef GLSL_BOP +#undef GLSL_UOP + if (binary) + { + if (cop.arguments.size() < 2) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + string cast_op0; + string cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], + cop.arguments[1], skip_cast_if_equal_type); + + if (type.basetype != input_type && type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + auto expr = bitcast_glsl_op(type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + return expr; + } + else + return join("(", cast_op0, " ", op, " ", cast_op1, ")"); + } + else if (unary) + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // Auto-bitcast to result type as needed. + // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. + return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); + } + else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + auto &arg_type = expression_type(cop.arguments[0]); + if (arg_type.width < type.width && input_type != arg_type.basetype) + { + auto expected = arg_type; + expected.basetype = input_type; + return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")"); + } + else + return join(op, "(", to_expression(cop.arguments[0]), ")"); + } + else + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + return join(op, "(", to_expression(cop.arguments[0]), ")"); + } +} + +string CompilerGLSL::constant_expression(const SPIRConstant &c) +{ + auto &type = get(c.constant_type); + + if (type.pointer) + { + return backend.null_pointer_literal; + } + else if (!c.subconstants.empty()) + { + // Handles Arrays and structures. + string res; + + // Allow Metal to use the array template to make arrays a value type + bool needs_trailing_tracket = false; + if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && + type.array.empty()) + { + res = type_to_glsl_constructor(type) + "{ "; + } + else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && + !type.array.empty()) + { + res = type_to_glsl_constructor(type) + "({ "; + needs_trailing_tracket = true; + } + else if (backend.use_initializer_list) + { + res = "{ "; + } + else + { + res = type_to_glsl_constructor(type) + "("; + } + + for (auto &elem : c.subconstants) + { + auto &subc = get(elem); + if (subc.specialization) + res += to_name(elem); + else + res += constant_expression(subc); + + if (&elem != &c.subconstants.back()) + res += ", "; + } + + res += backend.use_initializer_list ? " }" : ")"; + if (needs_trailing_tracket) + res += ")"; + + return res; + } + else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0) + { + // Metal tessellation likes empty structs which are then constant expressions. + if (backend.supports_empty_struct) + return "{ }"; + else if (backend.use_typed_initializer_list) + return join(type_to_glsl(get(c.constant_type)), "{ 0 }"); + else if (backend.use_initializer_list) + return "{ 0 }"; + else + return join(type_to_glsl(get(c.constant_type)), "(0)"); + } + else if (c.columns() == 1) + { + return constant_expression_vector(c, 0); + } + else + { + string res = type_to_glsl(get(c.constant_type)) + "("; + for (uint32_t col = 0; col < c.columns(); col++) + { + if (c.specialization_constant_id(col) != 0) + res += to_name(c.specialization_constant_id(col)); + else + res += constant_expression_vector(c, col); + + if (col + 1 < c.columns()) + res += ", "; + } + res += ")"; + return res; + } +} + +#ifdef _MSC_VER +// sprintf warning. +// We cannot rely on snprintf existing because, ..., MSVC. +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_f16(col, row); + + // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots + // of complicated workarounds, just value-cast to the half type always. + if (std::isnan(float_value) || std::isinf(float_value)) + { + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + + if (float_value == numeric_limits::infinity()) + res = join(type_to_glsl(type), "(1.0 / 0.0)"); + else if (float_value == -numeric_limits::infinity()) + res = join(type_to_glsl(type), "(-1.0 / 0.0)"); + else if (std::isnan(float_value)) + res = join(type_to_glsl(type), "(0.0 / 0.0)"); + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + else + { + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")"); + } + + return res; +} + +string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_f32(col, row); + + if (std::isnan(float_value) || std::isinf(float_value)) + { + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type; + SPIRType in_type; + out_type.basetype = SPIRType::Float; + in_type.basetype = SPIRType::UInt; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 32; + in_type.width = 32; + + char print_buffer[32]; + sprintf(print_buffer, "0x%xu", c.scalar(col, row)); + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); + } + else + { + if (float_value == numeric_limits::infinity()) + { + if (backend.float_literal_suffix) + res = "(1.0f / 0.0f)"; + else + res = "(1.0 / 0.0)"; + } + else if (float_value == -numeric_limits::infinity()) + { + if (backend.float_literal_suffix) + res = "(-1.0f / 0.0f)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(float_value)) + { + if (backend.float_literal_suffix) + res = "(0.0f / 0.0f)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + } + else + { + res = convert_to_string(float_value, current_locale_radix_character); + if (backend.float_literal_suffix) + res += "f"; + } + + return res; +} + +std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + double double_value = c.scalar_f64(col, row); + + if (std::isnan(double_value) || std::isinf(double_value)) + { + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type; + SPIRType in_type; + out_type.basetype = SPIRType::Double; + in_type.basetype = SPIRType::UInt64; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 64; + in_type.width = 64; + + uint64_t u64_value = c.scalar_u64(col, row); + + if (options.es) + SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile."); + require_extension_internal("GL_ARB_gpu_shader_int64"); + + char print_buffer[64]; + sprintf(print_buffer, "0x%llx%s", static_cast(u64_value), + backend.long_long_literal_suffix ? "ull" : "ul"); + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); + } + else + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); + + if (double_value == numeric_limits::infinity()) + { + if (backend.double_literal_suffix) + res = "(1.0lf / 0.0lf)"; + else + res = "(1.0 / 0.0)"; + } + else if (double_value == -numeric_limits::infinity()) + { + if (backend.double_literal_suffix) + res = "(-1.0lf / 0.0lf)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(double_value)) + { + if (backend.double_literal_suffix) + res = "(0.0lf / 0.0lf)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + } + else + { + res = convert_to_string(double_value, current_locale_radix_character); + if (backend.double_literal_suffix) + res += "lf"; + } + + return res; +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) +{ + auto type = get(c.constant_type); + type.columns = 1; + + auto scalar_type = type; + scalar_type.vecsize = 1; + + string res; + bool splat = backend.use_constructor_splatting && c.vector_size() > 1; + bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; + + if (!type_is_floating_point(type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } + + if (splat || swizzle_splat) + { + // Cannot use constant splatting if we have specialization constants somewhere in the vector. + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.specialization_constant_id(vector, i) != 0) + { + splat = false; + swizzle_splat = false; + break; + } + } + } + + if (splat || swizzle_splat) + { + if (type.width == 64) + { + uint64_t ident = c.scalar_u64(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar_u64(vector, i)) + { + splat = false; + swizzle_splat = false; + break; + } + } + } + else + { + uint32_t ident = c.scalar(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar(vector, i)) + { + splat = false; + swizzle_splat = false; + } + } + } + } + + if (c.vector_size() > 1 && !swizzle_splat) + res += type_to_glsl(type) + "("; + + switch (type.basetype) + { + case SPIRType::Half: + if (splat || swizzle_splat) + { + res += convert_half_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_half_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Float: + if (splat || swizzle_splat) + { + res += convert_float_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_float_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Double: + if (splat || swizzle_splat) + { + res += convert_double_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_double_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int64: + if (splat) + { + res += convert_to_string(c.scalar_i64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ll"; + else + res += "l"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar_i64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ll"; + else + res += "l"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt64: + if (splat) + { + res += convert_to_string(c.scalar_u64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar_u64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, 0) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar(vector, i)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, i) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made " + "the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int: + if (splat) + res += convert_to_string(c.scalar_i32(vector, 0)); + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_to_string(c.scalar_i32(vector, i)); + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UShort: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + if (*backend.uint16_t_literal_suffix) + { + res += convert_to_string(c.scalar_u16(vector, i)); + res += backend.uint16_t_literal_suffix; + } + else + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u16(vector, i)); + res += ")"; + } + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Short: + if (splat) + { + res += convert_to_string(c.scalar_i16(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + if (*backend.int16_t_literal_suffix) + { + res += convert_to_string(c.scalar_i16(vector, i)); + res += backend.int16_t_literal_suffix; + } + else + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i16(vector, i)); + res += ")"; + } + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UByte: + if (splat) + { + res += convert_to_string(c.scalar_u8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u8(vector, i)); + res += ")"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::SByte: + if (splat) + { + res += convert_to_string(c.scalar_i8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i8(vector, i)); + res += ")"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Boolean: + if (splat) + res += c.scalar(vector, 0) ? "true" : "false"; + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += c.scalar(vector, i) ? "true" : "false"; + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + default: + SPIRV_CROSS_THROW("Invalid constant expression basetype."); + } + + if (c.vector_size() > 1 && !swizzle_splat) + res += ")"; + + return res; +} + +SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) +{ + forced_temporaries.insert(id); + emit_uninitialized_temporary(type, id); + return set(id, to_name(id), type, true); +} + +void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) +{ + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (current_continue_block && !hoisted_temporaries.count(result_id)) + { + auto &header = get(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile(); + } + } + else if (hoisted_temporaries.count(result_id) == 0) + { + auto &type = get(result_type); + auto &flags = ir.meta[result_id].decoration.decoration_flags; + + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); + + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(result_type)); + + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";"); + } +} + +string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) +{ + auto &type = get(result_type); + auto &flags = ir.meta[result_id].decoration.decoration_flags; + + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (current_continue_block && !hoisted_temporaries.count(result_id)) + { + auto &header = get(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile(); + } + + return join(to_name(result_id), " = "); + } + else if (hoisted_temporaries.count(result_id)) + { + // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. + return join(to_name(result_id), " = "); + } + else + { + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); + return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); + } +} + +bool CompilerGLSL::expression_is_forwarded(uint32_t id) const +{ + return forwarded_temporaries.count(id) != 0; +} + +bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const +{ + return suppressed_usage_tracking.count(id) != 0; +} + +bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const +{ + auto *expr = maybe_get(id); + if (!expr) + return false; + + // If we're emitting code at a deeper loop level than when we emitted the expression, + // we're probably reading the same expression over and over. + return current_loop_level > expr->emitted_loop_level; +} + +SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, + bool suppress_usage_tracking) +{ + if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) + { + // Just forward it without temporary. + // If the forward is trivial, we do not force flushing to temporary for this expression. + forwarded_temporaries.insert(result_id); + if (suppress_usage_tracking) + suppressed_usage_tracking.insert(result_id); + + return set(result_id, rhs, result_type, true); + } + else + { + // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). + statement(declare_temporary(result_type, result_id), rhs, ";"); + return set(result_id, to_name(result_id), result_type, true); + } +} + +void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, + join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) +{ + auto &type = get(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + expr += op; + expr += to_extract_component_expression(operand, i); + + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(operand)); + + inherit_expression_dependencies(result_id, operand); +} + +void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, bool negate, SPIRType::BaseType expected_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + SPIRType target_type0 = type0; + SPIRType target_type1 = type1; + target_type0.basetype = expected_type; + target_type1.basetype = expected_type; + target_type0.vecsize = 1; + target_type1.vecsize = 1; + + auto &type = get(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + if (negate) + expr += "!("; + + if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) + expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i)); + else + expr += to_extract_component_expression(op0, i); + + expr += ' '; + expr += op; + expr += ' '; + + if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) + expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i)); + else + expr += to_extract_component_expression(op1, i); + + if (negate) + expr += ")"; + + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. + // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected + // since equality test is exactly the same. + bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); + + // Create a fake type so we can bitcast to it. + // We only deal with regular arithmetic types here like int, uints and so on. + SPIRType expected_type; + expected_type.basetype = input_type; + expected_type.vecsize = type0.vecsize; + expected_type.columns = type0.columns; + expected_type.width = type0.width; + + if (cast) + { + cast_op0 = bitcast_glsl(expected_type, op0); + cast_op1 = bitcast_glsl(expected_type, op1); + } + else + { + // If we don't cast, our actual input type is that of the first (or second) argument. + cast_op0 = to_enclosed_unpacked_expression(op0); + cast_op1 = to_enclosed_unpacked_expression(op1); + input_type = type0.basetype; + } + + return expected_type; +} + +bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) +{ + // Some bitcasts may require complex casting sequences, and are implemented here. + // Otherwise a simply unary function will do with bitcast_glsl_op. + + auto &output_type = get(result_type); + auto &input_type = expression_type(op0); + string expr; + + if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1) + expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))"); + else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && + input_type.vecsize == 2) + expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))"); + else + return false; + + emit_op(result_type, id, expr, should_forward(op0)); + return true; +} + +void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get(result_type); + + // We might have casted away from the result type, so bitcast again. + // For example, arithmetic right shift with uint inputs. + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + } + else + expr += join(cast_op0, " ", op, " ", cast_op1); + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"), + forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, + SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) +{ + auto &out_type = get(result_type); + auto &expr_type = expression_type(op0); + auto expected_type = out_type; + + // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. + expected_type.basetype = input_type; + expected_type.width = expr_type.width; + string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + + string expr; + if (out_type.basetype != expected_result_type) + { + expected_type.basetype = expected_result_type; + expected_type.width = out_type.width; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} + +// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs +// and different vector sizes all at once. Need a special purpose method here. +void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, + SPIRType::BaseType expected_result_type, + SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, + SPIRType::BaseType input_type2) +{ + auto &out_type = get(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type0; + + string cast_op0 = + expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); + + // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit. + expected_type.basetype = input_type1; + expected_type.vecsize = 1; + string cast_op1 = expression_type(op1).basetype != input_type1 ? + join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") : + op1_expr; + + expected_type.basetype = input_type2; + expected_type.vecsize = 1; + string cast_op2 = expression_type(op2).basetype != input_type2 ? + join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") : + op2_expr; + + string expr; + if (out_type.basetype != expected_result_type) + { + expected_type.vecsize = out_type.vecsize; + expected_type.basetype = expected_result_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, SPIRType::BaseType input_type) +{ + auto &out_type = get(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op0 = + expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + string cast_op1 = + expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1); + string cast_op2 = + expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2); + + string expr; + if (out_type.basetype != input_type) + { + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, + uint32_t op1, const char *op, SPIRType::BaseType input_type) +{ + // Special purpose method for implementing clustered subgroup opcodes. + // Main difference is that op1 does not participate in any casting, it needs to be a literal. + auto &out_type = get(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op0 = + expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + + string expr; + if (out_type.basetype != input_type) + { + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get(result_type); + + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} + +void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op, + SPIRType::BaseType offset_count_type) +{ + // Only need to cast offset/count arguments. Types of base/insert must be same as result type, + // and bitfieldInsert is sign invariant. + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + + auto op0_expr = to_unpacked_expression(op0); + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); + auto op3_expr = to_unpacked_expression(op3); + + SPIRType target_type; + target_type.vecsize = 1; + target_type.basetype = offset_count_type; + + if (expression_type(op2).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")"); + } + + if (expression_type(op3).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")"); + } + + emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} + +string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex) +{ + const char *type; + switch (imgtype.image.dim) + { + case spv::Dim1D: + type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; + break; + case spv::Dim2D: + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; + break; + case spv::Dim3D: + type = "3D"; + break; + case spv::DimCube: + type = "Cube"; + break; + case spv::DimRect: + type = "2DRect"; + break; + case spv::DimBuffer: + type = "Buffer"; + break; + case spv::DimSubpassData: + type = "2D"; + break; + default: + type = ""; + break; + } + + // In legacy GLSL, an extension is required for textureLod in the fragment + // shader or textureGrad anywhere. + bool legacy_lod_ext = false; + auto &execution = get_entry_point(); + if (op == "textureGrad" || op == "textureProjGrad" || + ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex)) + { + if (is_legacy_es()) + { + legacy_lod_ext = true; + require_extension_internal("GL_EXT_shader_texture_lod"); + } + else if (is_legacy_desktop()) + require_extension_internal("GL_ARB_shader_texture_lod"); + } + + if (op == "textureLodOffset" || op == "textureProjLodOffset") + { + if (is_legacy_es()) + SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); + + require_extension_internal("GL_EXT_gpu_shader4"); + } + + // GLES has very limited support for shadow samplers. + // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, + // everything else can just throw + bool is_comparison = image_is_comparison(imgtype, tex); + if (is_comparison && is_legacy_es()) + { + if (op == "texture" || op == "textureProj") + require_extension_internal("GL_EXT_shadow_samplers"); + else + SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); + } + + if (op == "textureSize") + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize not supported in legacy ES"); + if (is_comparison) + SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL"); + require_extension_internal("GL_EXT_gpu_shader4"); + } + + if (op == "texelFetch" && is_legacy_es()) + SPIRV_CROSS_THROW("texelFetch not supported in legacy ES"); + + bool is_es_and_depth = is_legacy_es() && is_comparison; + std::string type_prefix = is_comparison ? "shadow" : "texture"; + + if (op == "texture") + return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); + else if (op == "textureLod") + return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod"); + else if (op == "textureProj") + return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj"); + else if (op == "textureGrad") + return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad"); + else if (op == "textureProjLod") + return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod"); + else if (op == "textureLodOffset") + return join(type_prefix, type, "LodOffset"); + else if (op == "textureProjGrad") + return join(type_prefix, type, + is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); + else if (op == "textureProjLodOffset") + return join(type_prefix, type, "ProjLodOffset"); + else if (op == "textureSize") + return join("textureSize", type); + else if (op == "texelFetch") + return join("texelFetch", type); + else + { + SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); + } +} + +bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto *cleft = maybe_get(left); + auto *cright = maybe_get(right); + auto &lerptype = expression_type(lerp); + + // If our targets aren't constants, we cannot use construction. + if (!cleft || !cright) + return false; + + // If our targets are spec constants, we cannot use construction. + if (cleft->specialization || cright->specialization) + return false; + + // We can only use trivial construction if we have a scalar + // (should be possible to do it for vectors as well, but that is overkill for now). + if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1) + return false; + + // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. + bool ret = false; + switch (type.basetype) + { + case SPIRType::Short: + case SPIRType::UShort: + ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1; + break; + + case SPIRType::Int: + case SPIRType::UInt: + ret = cleft->scalar() == 0 && cright->scalar() == 1; + break; + + case SPIRType::Half: + ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f; + break; + + case SPIRType::Float: + ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f; + break; + + case SPIRType::Double: + ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0; + break; + + case SPIRType::Int64: + case SPIRType::UInt64: + ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1; + break; + + default: + break; + } + + if (ret) + op = type_to_glsl_constructor(type); + return ret; +} + +string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, + uint32_t false_value) +{ + string expr; + auto &lerptype = expression_type(select); + + if (lerptype.vecsize == 1) + expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ", + to_enclosed_pointer_expression(false_value)); + else + { + auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); }; + + expr = type_to_glsl_constructor(restype); + expr += "("; + for (uint32_t i = 0; i < restype.vecsize; i++) + { + expr += swiz(select, i); + expr += " ? "; + expr += swiz(true_value, i); + expr += " : "; + expr += swiz(false_value, i); + if (i + 1 < restype.vecsize) + expr += ", "; + } + expr += ")"; + } + + return expr; +} + +void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto &lerptype = expression_type(lerp); + auto &restype = get(result_type); + + // If this results in a variable pointer, assume it may be written through. + if (restype.pointer) + { + register_write(left); + register_write(right); + } + + string mix_op; + bool has_boolean_mix = *backend.boolean_mix_function && + ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); + bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); + + // Cannot use boolean mix when the lerp argument is just one boolean, + // fall back to regular trinary statements. + if (lerptype.vecsize == 1) + has_boolean_mix = false; + + // If we can reduce the mix to a simple cast, do so. + // This helps for cases like int(bool), uint(bool) which is implemented with + // OpSelect bool 1 0. + if (trivial_mix) + { + emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); + } + else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) + { + // Boolean mix not supported on desktop without extension. + // Was added in OpenGL 4.5 with ES 3.1 compat. + // + // Could use GL_EXT_shader_integer_mix on desktop at least, + // but Apple doesn't support it. :( + // Just implement it as ternary expressions. + auto expr = to_ternary_expression(get(result_type), lerp, right, left); + emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); + inherit_expression_dependencies(id, left); + inherit_expression_dependencies(id, right); + inherit_expression_dependencies(id, lerp); + } + else if (lerptype.basetype == SPIRType::Boolean) + emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function); + else + emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); +} + +string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) +{ + // Keep track of the array indices we have used to load the image. + // We'll need to use the same array index into the combined image sampler array. + auto image_expr = to_expression(image_id); + string array_expr; + auto array_index = image_expr.find_first_of('['); + if (array_index != string::npos) + array_expr = image_expr.substr(array_index, string::npos); + + auto &args = current_function->arguments; + + // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect + // all possible combinations into new sampler2D uniforms. + auto *image = maybe_get_backing_variable(image_id); + auto *samp = maybe_get_backing_variable(samp_id); + if (image) + image_id = image->self; + if (samp) + samp_id = samp->self; + + auto image_itr = find_if(begin(args), end(args), + [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); + + auto sampler_itr = find_if(begin(args), end(args), + [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); + + if (image_itr != end(args) || sampler_itr != end(args)) + { + // If any parameter originates from a parameter, we will find it in our argument list. + bool global_image = image_itr == end(args); + bool global_sampler = sampler_itr == end(args); + VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args))); + VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args))); + + auto &combined = current_function->combined_parameters; + auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { + return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && + p.sampler_id == sid; + }); + + if (itr != end(combined)) + return to_expression(itr->id) + array_expr; + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was " + "build_combined_image_samplers() used " + "before compile() was called?"); + } + } + else + { + // For global sampler2D, look directly at the global remapping table. + auto &mapping = combined_image_samplers; + auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == samp_id; + }); + + if (itr != end(combined_image_samplers)) + return to_expression(itr->combined_id) + array_expr; + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " + "before compile() was called?"); + } + } +} + +bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op) +{ + switch (op) + { + case OpGroupNonUniformElect: + case OpGroupNonUniformBallot: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpControlBarrier: + case OpMemoryBarrier: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformInverseBallot: + return true; + default: + return false; + } +} + +void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + if (options.vulkan_semantics && combined_image_samplers.empty()) + { + emit_binary_func_op(result_type, result_id, image_id, samp_id, + type_to_glsl(get(result_type), result_id).c_str()); + } + else + { + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + } + + // Make sure to suppress usage tracking and any expression invalidation. + // It is illegal to create temporaries of opaque types. + forwarded_temporaries.erase(result_id); +} + +static inline bool image_opcode_is_sample_no_dref(Op op) +{ + switch (op) + { + case OpImageSampleExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageFetch: + case OpImageRead: + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseFetch: + case OpImageSparseRead: + return true; + + default: + return false; + } +} + +void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, + uint32_t &texel_id) +{ + // Need to allocate two temporaries. + if (options.es) + SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL."); + require_extension_internal("GL_ARB_sparse_texture2"); + + auto &temps = extra_sub_expressions[id]; + if (temps == 0) + temps = ir.increase_bound_by(2); + + feedback_id = temps + 0; + texel_id = temps + 1; + + auto &return_type = get(result_type_id); + if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2) + SPIRV_CROSS_THROW("Invalid return type for sparse feedback."); + emit_uninitialized_temporary(return_type.member_types[0], feedback_id); + emit_uninitialized_temporary(return_type.member_types[1], texel_id); +} + +uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const +{ + auto itr = extra_sub_expressions.find(id); + if (itr == extra_sub_expressions.end()) + return 0; + else + return itr->second + 1; +} + +void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse) +{ + auto *ops = stream(i); + auto op = static_cast(i.op); + + SmallVector inherited_expressions; + + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + auto &return_type = get(result_type_id); + + uint32_t sparse_code_id = 0; + uint32_t sparse_texel_id = 0; + if (sparse) + emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id); + + bool forward = false; + string expr = to_texture_op(i, sparse, &forward, inherited_expressions); + + if (sparse) + { + statement(to_expression(sparse_code_id), " = ", expr, ";"); + expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id), + ")"); + forward = true; + inherited_expressions.clear(); + } + + emit_op(result_type_id, id, expr, forward); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + // Do not register sparse ops as control dependent as they are always lowered to a temporary. + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); + break; + + default: + break; + } +} + +std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions) +{ + auto *ops = stream(i); + auto op = static_cast(i.op); + uint32_t length = i.length; + + uint32_t result_type_id = ops[0]; + VariableID img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + bool fetch = false; + const uint32_t *opt = nullptr; + + auto &result_type = get(result_type_id); + + inherited_expressions.push_back(coord); + + // Make sure non-uniform decoration is back-propagated to where it needs to be. + if (has_decoration(img, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(img); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + proj = true; + break; + + case OpImageDrefGather: + case OpImageSparseDrefGather: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400."); + break; + + case OpImageGather: + case OpImageSparseGather: + comp = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + { + if (!expression_is_constant_null(comp)) + SPIRV_CROSS_THROW("textureGather with component requires GLSL 400."); + require_extension_internal("GL_ARB_texture_gather"); + } + break; + + case OpImageFetch: + case OpImageSparseFetch: + case OpImageRead: // Reads == fetches in Metal (other langs will not get here) + opt = &ops[4]; + length -= 4; + fetch = true; + break; + + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; + + default: + opt = &ops[4]; + length -= 4; + break; + } + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get(type.self); + + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: + coord_components = 1; + break; + case spv::Dim2D: + coord_components = 2; + break; + case spv::Dim3D: + coord_components = 3; + break; + case spv::DimCube: + coord_components = 3; + break; + case spv::DimBuffer: + coord_components = 1; + break; + default: + coord_components = 2; + break; + } + + if (dref) + inherited_expressions.push_back(dref); + + if (proj) + coord_components++; + if (imgtype.image.arrayed) + coord_components++; + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t minlod = 0; + uint32_t flags = 0; + + if (length) + { + flags = *opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + inherited_expressions.push_back(v); + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + test(minlod, ImageOperandsMinLodMask); + + TextureFunctionBaseArguments base_args = {}; + base_args.img = img; + base_args.imgtype = &imgtype; + base_args.is_fetch = fetch != 0; + base_args.is_gather = gather != 0; + base_args.is_proj = proj != 0; + + string expr; + TextureFunctionNameArguments name_args = {}; + + name_args.base = base_args; + name_args.has_array_offsets = coffsets != 0; + name_args.has_offset = coffset != 0 || offset != 0; + name_args.has_grad = grad_x != 0 || grad_y != 0; + name_args.has_dref = dref != 0; + name_args.is_sparse_feedback = sparse; + name_args.has_min_lod = minlod != 0; + name_args.lod = lod; + expr += to_function_name(name_args); + expr += "("; + + uint32_t sparse_texel_id = 0; + if (sparse) + sparse_texel_id = get_sparse_feedback_texel_id(ops[1]); + + TextureFunctionArguments args = {}; + args.base = base_args; + args.coord = coord; + args.coord_components = coord_components; + args.dref = dref; + args.grad_x = grad_x; + args.grad_y = grad_y; + args.lod = lod; + args.coffset = coffset; + args.offset = offset; + args.bias = bias; + args.component = comp; + args.sample = sample; + args.sparse_texel = sparse_texel_id; + args.min_lod = minlod; + expr += to_function_args(args, forward); + expr += ")"; + + // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. + if (is_legacy() && image_is_comparison(imgtype, img)) + expr += ".r"; + + // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. + // Remap back to 4 components as sampling opcodes expect. + if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) + { + bool image_is_depth = false; + const auto *combined = maybe_get(img); + VariableID image_id = combined ? combined->image : img; + + if (combined && image_is_comparison(imgtype, combined->image)) + image_is_depth = true; + else if (image_is_comparison(imgtype, img)) + image_is_depth = true; + + // We must also check the backing variable for the image. + // We might have loaded an OpImage, and used that handle for two different purposes. + // Once with comparison, once without. + auto *image_variable = maybe_get_backing_variable(image_id); + if (image_variable && image_is_comparison(get(image_variable->basetype), image_variable->self)) + image_is_depth = true; + + if (image_is_depth) + expr = remap_swizzle(result_type, 1, expr); + } + + if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32) + { + // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. + // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. + expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); + } + + // Deals with reads from MSL. We might need to downconvert to fewer components. + if (op == OpImageRead) + expr = remap_swizzle(result_type, 4, expr); + + return expr; +} + +bool CompilerGLSL::expression_is_constant_null(uint32_t id) const +{ + auto *c = maybe_get(id); + if (!c) + return false; + return c->constant_is_null(); +} + +bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr) +{ + auto &type = expression_type(ptr); + if (type.array.empty()) + return false; + + if (!backend.array_is_value_type) + return true; + + auto *var = maybe_get_backing_variable(ptr); + if (!var) + return false; + + auto &backed_type = get(var->basetype); + return !backend.buffer_offset_array_is_value_type && backed_type.basetype == SPIRType::Struct && + has_member_decoration(backed_type.self, 0, DecorationOffset); +} + +// Returns the function name for a texture sampling function for the specified image and sampling characteristics. +// For some subclasses, the function is a method on the specified image. +string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) +{ + if (args.has_min_lod) + { + if (options.es) + SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL."); + require_extension_internal("GL_ARB_sparse_texture_clamp"); + } + + string fname; + auto &imgtype = *args.base.imgtype; + VariableID tex = args.base.img; + + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = false; + if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + image_is_comparison(imgtype, tex) && args.lod) + { + if (!expression_is_constant_null(args.lod)) + { + SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be " + "expressed in GLSL."); + } + workaround_lod_array_shadow_as_grad = true; + } + + if (args.is_sparse_feedback) + fname += "sparse"; + + if (args.base.is_fetch) + fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch"; + else + { + fname += args.is_sparse_feedback ? "Texture" : "texture"; + + if (args.base.is_gather) + fname += "Gather"; + if (args.has_array_offsets) + fname += "Offsets"; + if (args.base.is_proj) + fname += "Proj"; + if (args.has_grad || workaround_lod_array_shadow_as_grad) + fname += "Grad"; + if (args.lod != 0 && !workaround_lod_array_shadow_as_grad) + fname += "Lod"; + } + + if (args.has_offset) + fname += "Offset"; + + if (args.has_min_lod) + fname += "Clamp"; + + if (args.is_sparse_feedback || args.has_min_lod) + fname += "ARB"; + + return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname; +} + +std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) +{ + auto *var = maybe_get_backing_variable(id); + + // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. + // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. + if (var) + { + auto &type = get(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + { + if (options.vulkan_semantics) + { + if (dummy_sampler_id) + { + // Don't need to consider Shadow state since the dummy sampler is always non-shadow. + auto sampled_type = type; + sampled_type.basetype = SPIRType::SampledImage; + return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ", + to_expression(dummy_sampler_id), ")"); + } + else + { + // Newer glslang supports this extension to deal with texture2D as argument to texture functions. + require_extension_internal("GL_EXT_samplerless_texture_functions"); + } + } + else + { + if (!dummy_sampler_id) + SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was " + "build_dummy_sampler_for_combined_images() called?"); + + return to_combined_image_sampler(id, dummy_sampler_id); + } + } + } + + return to_expression(id); +} + +// Returns the function args for a texture sampling function for the specified image and sampling characteristics. +string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) +{ + VariableID img = args.base.img; + auto &imgtype = *args.base.imgtype; + + string farg_str; + if (args.base.is_fetch) + farg_str = convert_separate_image_to_expression(img); + else + farg_str = to_expression(img); + + bool swizz_func = backend.swizzle_is_function; + auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; + + switch (comps) + { + case 1: + return ".x"; + case 2: + return swizz_func ? ".xy()" : ".xy"; + case 3: + return swizz_func ? ".xyz()" : ".xyz"; + default: + return ""; + } + }; + + bool forward = should_forward(args.coord); + + // The IR can give us more components than we need, so chop them off as needed. + auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize); + // Only enclose the UV expression if needed. + auto coord_expr = + (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr); + + // texelFetch only takes int, not uint. + auto &coord_type = expression_type(args.coord); + if (coord_type.basetype == SPIRType::UInt) + { + auto expected_type = coord_type; + expected_type.vecsize = args.coord_components; + expected_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); + } + + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = + ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + image_is_comparison(imgtype, img) && args.lod != 0; + + if (args.dref) + { + forward = forward && should_forward(args.dref); + + // SPIR-V splits dref and coordinate. + if (args.base.is_gather || + args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. + { + farg_str += ", "; + farg_str += to_expression(args.coord); + farg_str += ", "; + farg_str += to_expression(args.dref); + } + else if (args.base.is_proj) + { + // Have to reshuffle so we get vec4(coord, dref, proj), special case. + // Other shading languages splits up the arguments for coord and compare value like SPIR-V. + // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. + farg_str += ", vec4("; + + if (imgtype.image.dim == Dim1D) + { + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(args.coord) + ".x"; + farg_str += ", "; + farg_str += "0.0, "; + farg_str += to_expression(args.dref); + farg_str += ", "; + farg_str += to_enclosed_expression(args.coord) + ".y)"; + } + else if (imgtype.image.dim == Dim2D) + { + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy"); + farg_str += ", "; + farg_str += to_expression(args.dref); + farg_str += ", "; + farg_str += to_enclosed_expression(args.coord) + ".z)"; + } + else + SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); + } + else + { + // Create a composite which merges coord/dref into a single vector. + auto type = expression_type(args.coord); + type.vecsize = args.coord_components + 1; + farg_str += ", "; + farg_str += type_to_glsl_constructor(type); + farg_str += "("; + farg_str += coord_expr; + farg_str += ", "; + farg_str += to_expression(args.dref); + farg_str += ")"; + } + } + else + { + farg_str += ", "; + farg_str += coord_expr; + } + + if (args.grad_x || args.grad_y) + { + forward = forward && should_forward(args.grad_x); + forward = forward && should_forward(args.grad_y); + farg_str += ", "; + farg_str += to_expression(args.grad_x); + farg_str += ", "; + farg_str += to_expression(args.grad_y); + } + + if (args.lod) + { + if (workaround_lod_array_shadow_as_grad) + { + // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. + // Implementing this as plain texture() is not safe on some implementations. + if (imgtype.image.dim == Dim2D) + farg_str += ", vec2(0.0), vec2(0.0)"; + else if (imgtype.image.dim == DimCube) + farg_str += ", vec3(0.0), vec3(0.0)"; + } + else + { + forward = forward && should_forward(args.lod); + farg_str += ", "; + + auto &lod_expr_type = expression_type(args.lod); + + // Lod expression for TexelFetch in GLSL must be int, and only int. + if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms && + lod_expr_type.basetype != SPIRType::Int) + { + farg_str += join("int(", to_expression(args.lod), ")"); + } + else + { + farg_str += to_expression(args.lod); + } + } + } + else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) + { + // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. + farg_str += ", 0"; + } + + if (args.coffset) + { + forward = forward && should_forward(args.coffset); + farg_str += ", "; + farg_str += to_expression(args.coffset); + } + else if (args.offset) + { + forward = forward && should_forward(args.offset); + farg_str += ", "; + farg_str += to_expression(args.offset); + } + + if (args.sample) + { + farg_str += ", "; + farg_str += to_expression(args.sample); + } + + if (args.min_lod) + { + farg_str += ", "; + farg_str += to_expression(args.min_lod); + } + + if (args.sparse_texel) + { + // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments. + farg_str += ", "; + farg_str += to_expression(args.sparse_texel); + } + + if (args.bias) + { + forward = forward && should_forward(args.bias); + farg_str += ", "; + farg_str += to_expression(args.bias); + } + + if (args.component && !expression_is_constant_null(args.component)) + { + forward = forward && should_forward(args.component); + farg_str += ", "; + farg_str += to_expression(args.component); + } + + *p_forward = forward; + + return farg_str; +} + +void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) +{ + auto op = static_cast(eop); + + if (is_legacy() && is_unsigned_glsl_opcode(op)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + // FP fiddling + case GLSLstd450Round: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "round"); + else + { + auto op0 = to_enclosed_expression(args[0]); + auto &op0_type = expression_type(args[0]); + auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))"); + bool forward = should_forward(args[0]); + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + } + break; + + case GLSLstd450RoundEven: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "roundEven"); + else if (!options.es) + { + // This extension provides round() with round-to-even semantics. + require_extension_internal("GL_EXT_gpu_shader4"); + emit_unary_func_op(result_type, id, args[0], "round"); + } + else + SPIRV_CROSS_THROW("roundEven supported only in ESSL 300."); + break; + + case GLSLstd450Trunc: + emit_unary_func_op(result_type, id, args[0], "trunc"); + break; + case GLSLstd450SAbs: + emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type); + break; + case GLSLstd450FAbs: + emit_unary_func_op(result_type, id, args[0], "abs"); + break; + case GLSLstd450SSign: + emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type); + break; + case GLSLstd450FSign: + emit_unary_func_op(result_type, id, args[0], "sign"); + break; + case GLSLstd450Floor: + emit_unary_func_op(result_type, id, args[0], "floor"); + break; + case GLSLstd450Ceil: + emit_unary_func_op(result_type, id, args[0], "ceil"); + break; + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "fract"); + break; + case GLSLstd450Radians: + emit_unary_func_op(result_type, id, args[0], "radians"); + break; + case GLSLstd450Degrees: + emit_unary_func_op(result_type, id, args[0], "degrees"); + break; + case GLSLstd450Fma: + if ((!options.es && options.version < 400) || (options.es && options.version < 320)) + { + auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ", + to_enclosed_expression(args[2])); + + emit_op(result_type, id, expr, + should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); + for (uint32_t i = 0; i < 3; i++) + inherit_expression_dependencies(id, args[i]); + } + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); + break; + case GLSLstd450Modf: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "modf"); + break; + + case GLSLstd450ModfStruct: + { + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, id); + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + break; + } + + // Minmax + case GLSLstd450UMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false); + break; + + case GLSLstd450SMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false); + break; + + case GLSLstd450FMin: + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + break; + + case GLSLstd450FMax: + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + break; + + case GLSLstd450UMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false); + break; + + case GLSLstd450SMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false); + break; + + case GLSLstd450FClamp: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + break; + + case GLSLstd450UClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type); + break; + + case GLSLstd450SClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type); + break; + + // Trig + case GLSLstd450Sin: + emit_unary_func_op(result_type, id, args[0], "sin"); + break; + case GLSLstd450Cos: + emit_unary_func_op(result_type, id, args[0], "cos"); + break; + case GLSLstd450Tan: + emit_unary_func_op(result_type, id, args[0], "tan"); + break; + case GLSLstd450Asin: + emit_unary_func_op(result_type, id, args[0], "asin"); + break; + case GLSLstd450Acos: + emit_unary_func_op(result_type, id, args[0], "acos"); + break; + case GLSLstd450Atan: + emit_unary_func_op(result_type, id, args[0], "atan"); + break; + case GLSLstd450Sinh: + emit_unary_func_op(result_type, id, args[0], "sinh"); + break; + case GLSLstd450Cosh: + emit_unary_func_op(result_type, id, args[0], "cosh"); + break; + case GLSLstd450Tanh: + emit_unary_func_op(result_type, id, args[0], "tanh"); + break; + case GLSLstd450Asinh: + emit_unary_func_op(result_type, id, args[0], "asinh"); + break; + case GLSLstd450Acosh: + emit_unary_func_op(result_type, id, args[0], "acosh"); + break; + case GLSLstd450Atanh: + emit_unary_func_op(result_type, id, args[0], "atanh"); + break; + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan"); + break; + + // Exponentials + case GLSLstd450Pow: + emit_binary_func_op(result_type, id, args[0], args[1], "pow"); + break; + case GLSLstd450Exp: + emit_unary_func_op(result_type, id, args[0], "exp"); + break; + case GLSLstd450Log: + emit_unary_func_op(result_type, id, args[0], "log"); + break; + case GLSLstd450Exp2: + emit_unary_func_op(result_type, id, args[0], "exp2"); + break; + case GLSLstd450Log2: + emit_unary_func_op(result_type, id, args[0], "log2"); + break; + case GLSLstd450Sqrt: + emit_unary_func_op(result_type, id, args[0], "sqrt"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "inversesqrt"); + break; + + // Matrix math + case GLSLstd450Determinant: + emit_unary_func_op(result_type, id, args[0], "determinant"); + break; + case GLSLstd450MatrixInverse: + emit_unary_func_op(result_type, id, args[0], "inverse"); + break; + + // Lerping + case GLSLstd450FMix: + case GLSLstd450IMix: + { + emit_mix_op(result_type, id, args[0], args[1], args[2]); + break; + } + case GLSLstd450Step: + emit_binary_func_op(result_type, id, args[0], args[1], "step"); + break; + case GLSLstd450SmoothStep: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); + break; + + // Packing + case GLSLstd450Frexp: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); + break; + + case GLSLstd450FrexpStruct: + { + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, id); + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + break; + } + + case GLSLstd450Ldexp: + { + bool forward = should_forward(args[0]) && should_forward(args[1]); + + auto op0 = to_unpacked_expression(args[0]); + auto op1 = to_unpacked_expression(args[1]); + auto &op1_type = expression_type(args[1]); + if (op1_type.basetype != SPIRType::Int) + { + // Need a value cast here. + auto target_type = op1_type; + target_type.basetype = SPIRType::Int; + op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")"); + } + + auto expr = join("ldexp(", op0, ", ", op1, ")"); + + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); + break; + } + + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); + break; + case GLSLstd450PackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); + break; + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); + break; + case GLSLstd450UnpackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); + break; + + case GLSLstd450PackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); + break; + case GLSLstd450UnpackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); + break; + + // Vector math + case GLSLstd450Length: + emit_unary_func_op(result_type, id, args[0], "length"); + break; + case GLSLstd450Distance: + emit_binary_func_op(result_type, id, args[0], args[1], "distance"); + break; + case GLSLstd450Cross: + emit_binary_func_op(result_type, id, args[0], args[1], "cross"); + break; + case GLSLstd450Normalize: + emit_unary_func_op(result_type, id, args[0], "normalize"); + break; + case GLSLstd450FaceForward: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); + break; + case GLSLstd450Reflect: + emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); + break; + case GLSLstd450Refract: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); + break; + + // Bit-fiddling + case GLSLstd450FindILsb: + // findLSB always returns int. + emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type); + break; + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type, + int_type); // findMSB always returns int. + break; + + // Multisampled varying + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); + break; + + case GLSLstd450NMin: + case GLSLstd450NMax: + { + emit_nminmax_op(result_type, id, args[0], args[1], op); + break; + } + + case GLSLstd450NClamp: + { + // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. + // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. + uint32_t &max_id = extra_sub_expressions[id | 0x80000000u]; + if (!max_id) + max_id = ir.increase_bound_by(1); + + // Inherit precision qualifiers. + ir.meta[max_id] = ir.meta[id]; + + emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); + emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); + break; + } + + default: + statement("// unimplemented GLSL op ", eop); + break; + } +} + +void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) +{ + // Need to emulate this call. + uint32_t &ids = extra_sub_expressions[id]; + if (!ids) + { + ids = ir.increase_bound_by(5); + auto btype = get(result_type); + btype.basetype = SPIRType::Boolean; + set(ids, btype); + } + + uint32_t btype_id = ids + 0; + uint32_t left_nan_id = ids + 1; + uint32_t right_nan_id = ids + 2; + uint32_t tmp_id = ids + 3; + uint32_t mixed_first_id = ids + 4; + + // Inherit precision qualifiers. + ir.meta[tmp_id] = ir.meta[id]; + ir.meta[mixed_first_id] = ir.meta[id]; + + emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); + emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); + emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); + emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); + emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); +} + +void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_shader_ballot"); + + enum AMDShaderBallot + { + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4 + }; + + auto op = static_cast(eop); + + switch (op) + { + case SwizzleInvocationsAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); + register_control_dependent_expression(id); + break; + + case SwizzleInvocationsMaskedAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); + register_control_dependent_expression(id); + break; + + case WriteInvocationAMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); + register_control_dependent_expression(id); + break; + + case MbcntAMD: + emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); + register_control_dependent_expression(id); + break; + + default: + statement("// unimplemented SPV AMD shader ballot op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); + + enum AMDShaderExplicitVertexParameter + { + InterpolateAtVertexAMD = 1 + }; + + auto op = static_cast(eop); + + switch (op) + { + case InterpolateAtVertexAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); + break; + + default: + statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_trinary_minmax"); + + enum AMDShaderTrinaryMinMax + { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 + }; + + auto op = static_cast(eop); + + switch (op) + { + case FMin3AMD: + case UMin3AMD: + case SMin3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); + break; + + case FMax3AMD: + case UMax3AMD: + case SMax3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); + break; + + case FMid3AMD: + case UMid3AMD: + case SMid3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); + break; + + default: + statement("// unimplemented SPV AMD shader trinary minmax op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_gcn_shader"); + + enum AMDGCNShader + { + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3 + }; + + auto op = static_cast(eop); + + switch (op) + { + case CubeFaceIndexAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); + break; + case CubeFaceCoordAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); + break; + case TimeAMD: + { + string expr = "timeAMD()"; + emit_op(result_type, id, expr, true); + register_control_dependent_expression(id); + break; + } + + default: + statement("// unimplemented SPV AMD gcn shader op ", eop); + break; + } +} + +void CompilerGLSL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op)) + SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + case OpGroupNonUniformElect: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect); + break; + + case OpGroupNonUniformBallotBitCount: + { + const GroupOperation operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount); + else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + } + break; + + case OpGroupNonUniformBallotBitExtract: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract); + break; + + case OpGroupNonUniformInverseBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + break; + + case OpGroupNonUniformBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot); + break; + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB); + break; + + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBrodcast_First); + break; + + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + require_extension_internal("GL_KHR_shader_subgroup_shuffle"); + break; + + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); + break; + + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + { + const SPIRType &type = expression_type(ops[3]); + if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool); + else + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT); + } + break; + + case OpGroupNonUniformFAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + { + auto operation = static_cast(ops[3]); + if (operation == GroupOperationClusteredReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_clustered"); + } + else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || + operation == GroupOperationReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_arithmetic"); + } + else + SPIRV_CROSS_THROW("Invalid group operation."); + break; + } + + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + require_extension_internal("GL_KHR_shader_subgroup_quad"); + break; + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "subgroupElect()", true); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallot"); + break; + + case OpGroupNonUniformInverseBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot"); + break; + + case OpGroupNonUniformBallotBitExtract: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotFindLSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB"); + break; + + case OpGroupNonUniformBallotFindMSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB"); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount"); + else if (operation == GroupOperationInclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount"); + else if (operation == GroupOperationExclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount"); + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + + case OpGroupNonUniformShuffle: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle"); + break; + + case OpGroupNonUniformShuffleXor: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor"); + break; + + case OpGroupNonUniformShuffleUp: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp"); + break; + + case OpGroupNonUniformShuffleDown: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); + break; + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "subgroupAny"); + break; + + case OpGroupNonUniformAllEqual: + emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual"); + break; + + // clang-format off +#define GLSL_GROUP_OP(op, glsl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + +#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \ + else if (operation == GroupOperationClusteredReduce) \ + emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + + GLSL_GROUP_OP(FAdd, Add) + GLSL_GROUP_OP(FMul, Mul) + GLSL_GROUP_OP(FMin, Min) + GLSL_GROUP_OP(FMax, Max) + GLSL_GROUP_OP(IAdd, Add) + GLSL_GROUP_OP(IMul, Mul) + GLSL_GROUP_OP_CAST(SMin, Min, int_type) + GLSL_GROUP_OP_CAST(SMax, Max, int_type) + GLSL_GROUP_OP_CAST(UMin, Min, uint_type) + GLSL_GROUP_OP_CAST(UMax, Max, uint_type) + GLSL_GROUP_OP(BitwiseAnd, And) + GLSL_GROUP_OP(BitwiseOr, Or) + GLSL_GROUP_OP(BitwiseXor, Xor) +#undef GLSL_GROUP_OP +#undef GLSL_GROUP_OP_CAST + // clang-format on + + case OpGroupNonUniformQuadSwap: + { + uint32_t direction = evaluate_constant_u32(ops[4]); + if (direction == 0) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal"); + else if (direction == 1) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical"); + else if (direction == 2) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal"); + else + SPIRV_CROSS_THROW("Invalid quad swap direction."); + break; + } + + case OpGroupNonUniformQuadBroadcast: + { + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast"); + break; + } + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + +string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + // OpBitcast can deal with pointers. + if (out_type.pointer || in_type.pointer) + return type_to_glsl(out_type); + + if (out_type.basetype == in_type.basetype) + return ""; + + assert(out_type.basetype != SPIRType::Boolean); + assert(in_type.basetype != SPIRType::Boolean); + + bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); + bool same_size_cast = out_type.width == in_type.width; + + // Trivial bitcast case, casts between integers. + if (integral_cast && same_size_cast) + return type_to_glsl(out_type); + + // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). + if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) + return "unpack8"; + else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) + return "pack16"; + else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) + return "pack32"; + + // Floating <-> Integer special casts. Just have to enumerate all cases. :( + // 16-bit, 32-bit and 64-bit floats. + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "floatBitsToUint"; + } + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "floatBitsToInt"; + } + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "uintBitsToFloat"; + } + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "intBitsToFloat"; + } + + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToInt64"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToUint64"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "int64BitsToDouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "uint64BitsToDouble"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) + return "float16BitsToInt16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) + return "float16BitsToUint16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) + return "int16BitsToFloat16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) + return "uint16BitsToFloat16"; + + // And finally, some even more special purpose casts. + if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) + return "packUint2x32"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2) + return "unpackUint2x32"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackFloat2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + return "packFloat2x16"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) + return "packInt2x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) + return "unpackInt2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) + return "packUint2x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackUint2x16"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) + return "packInt4x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) + return "unpackInt4x16"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) + return "packUint4x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) + return "unpackUint4x16"; + + return ""; +} + +string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) +{ + auto op = bitcast_glsl_op(result_type, expression_type(argument)); + if (op.empty()) + return to_enclosed_unpacked_expression(argument); + else + return join(op, "(", to_unpacked_expression(argument), ")"); +} + +std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) +{ + auto expr = to_expression(arg); + auto &src_type = expression_type(arg); + if (src_type.basetype != target_type) + { + auto target = src_type; + target.basetype = target_type; + expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); + } + + return expr; +} + +std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, + const std::string &expr) +{ + if (target_type.basetype == expr_type) + return expr; + + auto src_type = target_type; + src_type.basetype = expr_type; + return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); +} + +string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) +{ + switch (builtin) + { + case BuiltInPosition: + return "gl_Position"; + case BuiltInPointSize: + return "gl_PointSize"; + case BuiltInClipDistance: + return "gl_ClipDistance"; + case BuiltInCullDistance: + return "gl_CullDistance"; + case BuiltInVertexId: + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created " + "with GL semantics."); + return "gl_VertexID"; + case BuiltInInstanceId: + if (options.vulkan_semantics) + { + auto model = get_entry_point().model; + switch (model) + { + case spv::ExecutionModelIntersectionKHR: + case spv::ExecutionModelAnyHitKHR: + case spv::ExecutionModelClosestHitKHR: + // gl_InstanceID is allowed in these shaders. + break; + + default: + SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was " + "created with GL semantics."); + } + } + if (!options.es && options.version < 140) + { + require_extension_internal("GL_ARB_draw_instanced"); + } + return "gl_InstanceID"; + case BuiltInVertexIndex: + if (options.vulkan_semantics) + return "gl_VertexIndex"; + else + return "gl_VertexID"; // gl_VertexID already has the base offset applied. + case BuiltInInstanceIndex: + if (options.vulkan_semantics) + return "gl_InstanceIndex"; + + if (!options.es && options.version < 140) + { + require_extension_internal("GL_ARB_draw_instanced"); + } + + if (options.vertex.support_nonzero_base_instance) + { + if (!options.vulkan_semantics) + { + // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported. + require_extension_internal("GL_ARB_shader_draw_parameters"); + } + return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. + } + else + return "gl_InstanceID"; + case BuiltInPrimitiveId: + if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) + return "gl_PrimitiveIDIn"; + else + return "gl_PrimitiveID"; + case BuiltInInvocationId: + return "gl_InvocationID"; + case BuiltInLayer: + return "gl_Layer"; + case BuiltInViewportIndex: + return "gl_ViewportIndex"; + case BuiltInTessLevelOuter: + return "gl_TessLevelOuter"; + case BuiltInTessLevelInner: + return "gl_TessLevelInner"; + case BuiltInTessCoord: + return "gl_TessCoord"; + case BuiltInFragCoord: + return "gl_FragCoord"; + case BuiltInPointCoord: + return "gl_PointCoord"; + case BuiltInFrontFacing: + return "gl_FrontFacing"; + case BuiltInFragDepth: + return "gl_FragDepth"; + case BuiltInNumWorkgroups: + return "gl_NumWorkGroups"; + case BuiltInWorkgroupSize: + return "gl_WorkGroupSize"; + case BuiltInWorkgroupId: + return "gl_WorkGroupID"; + case BuiltInLocalInvocationId: + return "gl_LocalInvocationID"; + case BuiltInGlobalInvocationId: + return "gl_GlobalInvocationID"; + case BuiltInLocalInvocationIndex: + return "gl_LocalInvocationIndex"; + case BuiltInHelperInvocation: + return "gl_HelperInvocation"; + + case BuiltInBaseVertex: + if (options.es) + SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseVertexARB"; + } + return "gl_BaseVertex"; + } + else + { + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "SPIRV_Cross_BaseVertex"; + } + break; + + case BuiltInBaseInstance: + if (options.es) + SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseInstanceARB"; + } + return "gl_BaseInstance"; + } + else + { + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "SPIRV_Cross_BaseInstance"; + } + break; + + case BuiltInDrawIndex: + if (options.es) + SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_DrawIDARB"; + } + return "gl_DrawID"; + } + else + { + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_DrawIDARB"; + } + break; + + case BuiltInSampleId: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400."); + return "gl_SampleID"; + + case BuiltInSampleMask: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400."); + + if (storage == StorageClassInput) + return "gl_SampleMaskIn"; + else + return "gl_SampleMask"; + + case BuiltInSamplePosition: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400."); + return "gl_SamplePosition"; + + case BuiltInViewIndex: + if (options.vulkan_semantics) + { + require_extension_internal("GL_EXT_multiview"); + return "gl_ViewIndex"; + } + else + { + require_extension_internal("GL_OVR_multiview2"); + return "gl_ViewID_OVR"; + } + + case BuiltInNumSubgroups: + request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups); + return "gl_NumSubgroups"; + + case BuiltInSubgroupId: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID); + return "gl_SubgroupID"; + + case BuiltInSubgroupSize: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize); + return "gl_SubgroupSize"; + + case BuiltInSubgroupLocalInvocationId: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID); + return "gl_SubgroupInvocationID"; + + case BuiltInSubgroupEqMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupEqMask"; + + case BuiltInSubgroupGeMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupGeMask"; + + case BuiltInSubgroupGtMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupGtMask"; + + case BuiltInSubgroupLeMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupLeMask"; + + case BuiltInSubgroupLtMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupLtMask"; + + case BuiltInLaunchIdNV: + return "gl_LaunchIDNV"; + case BuiltInLaunchSizeNV: + return "gl_LaunchSizeNV"; + case BuiltInWorldRayOriginNV: + return "gl_WorldRayOriginNV"; + case BuiltInWorldRayDirectionNV: + return "gl_WorldRayDirectionNV"; + case BuiltInObjectRayOriginNV: + return "gl_ObjectRayOriginNV"; + case BuiltInObjectRayDirectionNV: + return "gl_ObjectRayDirectionNV"; + case BuiltInRayTminNV: + return "gl_RayTminNV"; + case BuiltInRayTmaxNV: + return "gl_RayTmaxNV"; + case BuiltInInstanceCustomIndexNV: + return "gl_InstanceCustomIndexNV"; + case BuiltInObjectToWorldNV: + return "gl_ObjectToWorldNV"; + case BuiltInWorldToObjectNV: + return "gl_WorldToObjectNV"; + case BuiltInHitTNV: + return "gl_HitTNV"; + case BuiltInHitKindNV: + return "gl_HitKindNV"; + case BuiltInIncomingRayFlagsNV: + return "gl_IncomingRayFlagsNV"; + + case BuiltInBaryCoordNV: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450."); + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNV"; + } + + case BuiltInBaryCoordNoPerspNV: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450."); + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspNV"; + } + + case BuiltInFragStencilRefEXT: + { + if (!options.es) + { + require_extension_internal("GL_ARB_shader_stencil_export"); + return "gl_FragStencilRefARB"; + } + else + SPIRV_CROSS_THROW("Stencil export not supported in GLES."); + } + + case BuiltInDeviceIndex: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); + require_extension_internal("GL_EXT_device_group"); + return "gl_DeviceIndex"; + + default: + return join("gl_BuiltIn_", convert_to_string(builtin)); + } +} + +const char *CompilerGLSL::index_to_swizzle(uint32_t index) +{ + switch (index) + { + case 0: + return "x"; + case 1: + return "y"; + case 2: + return "z"; + case 3: + return "w"; + default: + SPIRV_CROSS_THROW("Swizzle index out of range"); + } +} + +void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type, + AccessChainFlags flags, bool & /*access_chain_is_arrayed*/, + uint32_t index) +{ + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + + expr += "["; + + // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier. + bool nonuniform_index = + has_decoration(index, DecorationNonUniformEXT) && + (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock)); + if (nonuniform_index) + { + expr += backend.nonuniform_qualifier; + expr += "("; + } + + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index, register_expression_read); + + if (nonuniform_index) + expr += ")"; + + expr += "]"; +} + +string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, + AccessChainFlags flags, AccessChainMeta *meta) +{ + string expr; + + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0; + bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; + bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0; + + if (!chain_only) + { + // We handle transpose explicitly, so don't resolve that here. + auto *e = maybe_get(base); + bool old_transpose = e && e->need_transpose; + if (e) + e->need_transpose = false; + expr = to_enclosed_expression(base, register_expression_read); + if (e) + e->need_transpose = old_transpose; + } + + // Start traversing type hierarchy at the proper non-pointer types, + // but keep type_id referencing the original pointer for use below. + uint32_t type_id = expression_type_id(base); + + if (!backend.native_pointers) + { + if (ptr_chain) + SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain."); + + // Wrapped buffer reference pointer types will need to poke into the internal "value" member before + // continuing the access chain. + if (should_dereference(base)) + { + auto &type = get(type_id); + expr = dereference_expression(type, expr); + } + } + + const auto *type = &get_pointee_type(type_id); + + bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; + bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); + bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked); + uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID); + bool is_invariant = has_decoration(base, DecorationInvariant); + bool pending_array_enclose = false; + bool dimension_flatten = false; + + const auto append_index = [&](uint32_t index, bool is_literal) { + AccessChainFlags mod_flags = flags; + if (!is_literal) + mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; + access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index); + }; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + bool is_literal = index_is_literal; + if (is_literal && msb_is_id && (index >> 31u) != 0u) + { + is_literal = false; + index &= 0x7fffffffu; + } + + // Pointer chains + if (ptr_chain && i == 0) + { + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays) + { + dimension_flatten = type->array.size() >= 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + if (is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(type->array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(*type, j - 1)); + } + + if (type->array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + else + { + append_index(index, is_literal); + } + + if (type->basetype == SPIRType::ControlPointArray) + { + type_id = type->parent_type; + type = &get(type_id); + } + + access_chain_is_arrayed = true; + } + // Arrays + else if (!type->array.empty()) + { + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays && !pending_array_enclose) + { + dimension_flatten = type->array.size() > 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + assert(type->parent_type); + + auto *var = maybe_get(base); + if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && + !has_decoration(type->self, DecorationBlock)) + { + // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. + // Normally, these variables live in blocks when compiled from GLSL, + // but HLSL seems to just emit straight arrays here. + // We must pretend this access goes through gl_in/gl_out arrays + // to be able to access certain builtins as arrays. + auto builtin = ir.meta[base].decoration.builtin_type; + switch (builtin) + { + // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. + // case BuiltInClipDistance: + case BuiltInPosition: + case BuiltInPointSize: + if (var->storage == StorageClassInput) + expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassOutput) + expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); + else + append_index(index, is_literal); + break; + + default: + append_index(index, is_literal); + break; + } + } + else if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + auto &parent_type = get(type->parent_type); + + if (is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(parent_type.array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(parent_type, j - 1)); + } + + if (parent_type.array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. + // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. + else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) + { + append_index(index, is_literal); + } + + type_id = type->parent_type; + type = &get(type_id); + + access_chain_is_arrayed = true; + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + if (!is_literal) + index = evaluate_constant_u32(index); + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + BuiltIn builtin; + if (is_member_builtin(*type, index, &builtin)) + { + if (access_chain_is_arrayed) + { + expr += "."; + expr += builtin_to_glsl(builtin, type->storage); + } + else + expr = builtin_to_glsl(builtin, type->storage); + } + else + { + // If the member has a qualified name, use it as the entire chain + string qual_mbr_name = get_member_qualified_name(type_id, index); + if (!qual_mbr_name.empty()) + expr = qual_mbr_name; + else if (flatten_member_reference) + expr += join("_", to_member_name(*type, index)); + else + expr += to_member_reference(base, *type, index, ptr_chain); + } + + if (has_member_decoration(type->self, index, DecorationInvariant)) + is_invariant = true; + + is_packed = member_is_packed_physical_type(*type, index); + if (member_is_remapped_physical_type(*type, index)) + physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID); + else + physical_type = 0; + + row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); + type = &get(type->member_types[index]); + } + // Matrix -> Vector + else if (type->columns > 1) + { + // If we have a row-major matrix here, we need to defer any transpose in case this access chain + // is used to store a column. We can resolve it right here and now if we access a scalar directly, + // by flipping indexing order of the matrix. + + expr += "["; + if (is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index, register_expression_read); + expr += "]"; + + type_id = type->parent_type; + type = &get(type_id); + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + string deferred_index; + if (row_major_matrix_needs_conversion) + { + // Flip indexing order. + auto column_index = expr.find_last_of('['); + if (column_index != string::npos) + { + deferred_index = expr.substr(column_index); + expr.resize(column_index); + } + } + + // Internally, access chain implementation can also be used on composites, + // ignore scalar access workarounds in this case. + StorageClass effective_storage; + if (expression_type(base).pointer) + effective_storage = get_expression_effective_storage_class(base); + else + effective_storage = StorageClassGeneric; + + if (!row_major_matrix_needs_conversion) + { + // On some backends, we might not be able to safely access individual scalars in a vector. + // To work around this, we might have to cast the access chain reference to something which can, + // like a pointer to scalar, which we can then index into. + prepare_access_chain_for_scalar_access(expr, get(type->parent_type), effective_storage, + is_packed); + } + + if (is_literal && !is_packed && !row_major_matrix_needs_conversion) + { + expr += "."; + expr += index_to_swizzle(index); + } + else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) + { + auto &c = get(index); + if (c.specialization) + { + // If the index is a spec constant, we cannot turn extract into a swizzle. + expr += join("[", to_expression(index), "]"); + } + else + { + expr += "."; + expr += index_to_swizzle(c.scalar()); + } + } + else if (is_literal) + { + // For packed vectors, we can only access them as an array, not by swizzle. + expr += join("[", index, "]"); + } + else + { + expr += "["; + expr += to_expression(index, register_expression_read); + expr += "]"; + } + + if (row_major_matrix_needs_conversion) + { + prepare_access_chain_for_scalar_access(expr, get(type->parent_type), effective_storage, + is_packed); + } + + expr += deferred_index; + row_major_matrix_needs_conversion = false; + + is_packed = false; + physical_type = 0; + type_id = type->parent_type; + type = &get(type_id); + } + else if (!backend.allow_truncated_access_chain) + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (pending_array_enclose) + { + SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " + "but the access chain was terminated in the middle of a multidimensional array. " + "This is not supported."); + } + + if (meta) + { + meta->need_transpose = row_major_matrix_needs_conversion; + meta->storage_is_packed = is_packed; + meta->storage_is_invariant = is_invariant; + meta->storage_physical_type = physical_type; + } + + return expr; +} + +void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) +{ +} + +string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index) +{ + auto ret = join(basename, "_", to_member_name(type, index)); + ParsedIR::sanitize_underscores(ret); + return ret; +} + +string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, + AccessChainMeta *meta, bool ptr_chain) +{ + if (flattened_buffer_blocks.count(base)) + { + uint32_t matrix_stride = 0; + uint32_t array_stride = 0; + bool need_transpose = false; + flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride, + &array_stride, ptr_chain); + + if (meta) + { + meta->need_transpose = target_type.columns > 1 && need_transpose; + meta->storage_is_packed = false; + } + + return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride, + need_transpose); + } + else if (flattened_structs.count(base) && count > 0) + { + AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + + if (flattened_structs[base]) + { + flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT; + if (meta) + meta->flattened_struct = target_type.basetype == SPIRType::Struct; + } + + auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1); + if (meta) + { + meta->need_transpose = false; + meta->storage_is_packed = false; + } + + auto basename = to_flattened_access_chain_expression(base); + auto ret = join(basename, "_", chain); + ParsedIR::sanitize_underscores(ret); + return ret; + } + else + { + AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + return access_chain_internal(base, indices, count, flags, meta); + } +} + +string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type) +{ + auto expr = type_to_glsl_constructor(type); + expr += '('; + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + if (i) + expr += ", "; + + auto &member_type = get(type.member_types[i]); + if (member_type.basetype == SPIRType::Struct) + expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type); + else + expr += to_flattened_struct_member(basename, type, i); + } + expr += ')'; + return expr; +} + +std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id) +{ + // Do not use to_expression as that will unflatten access chains. + string basename; + if (const auto *var = maybe_get(id)) + basename = to_name(var->self); + else if (const auto *expr = maybe_get(id)) + basename = expr->expression; + else + basename = to_expression(id); + + return basename; +} + +void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type, + const SmallVector &indices) +{ + SmallVector sub_indices = indices; + sub_indices.push_back(0); + + auto *member_type = &type; + for (auto &index : indices) + member_type = &get(member_type->member_types[index]); + + for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) + { + sub_indices.back() = i; + auto lhs = join(basename, "_", to_member_name(*member_type, i)); + ParsedIR::sanitize_underscores(lhs); + + if (get(member_type->member_types[i]).basetype == SPIRType::Struct) + { + store_flattened_struct(lhs, rhs_id, type, sub_indices); + } + else + { + auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices); + statement(lhs, " = ", rhs, ";"); + } + } +} + +void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value) +{ + auto &type = expression_type(lhs_id); + auto basename = to_flattened_access_chain_expression(lhs_id); + store_flattened_struct(basename, value, type, {}); +} + +std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + uint32_t /* array_stride */, bool need_transpose) +{ + if (!target_type.array.empty()) + SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); + else if (target_type.basetype == SPIRType::Struct) + return flattened_access_chain_struct(base, indices, count, target_type, offset); + else if (target_type.columns > 1) + return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); + else + return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); +} + +std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset) +{ + std::string expr; + + expr += type_to_glsl_constructor(target_type); + expr += "("; + + for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) + { + if (i != 0) + expr += ", "; + + const SPIRType &member_type = get(target_type.member_types[i]); + uint32_t member_offset = type_struct_member_offset(target_type, i); + + // The access chain terminates at the struct, so we need to find matrix strides and row-major information + // ahead of time. + bool need_transpose = false; + uint32_t matrix_stride = 0; + if (member_type.columns > 1) + { + need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor); + matrix_stride = type_struct_member_matrix_stride(target_type, i); + } + + auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, + 0 /* array_stride */, need_transpose); + + // Cannot forward transpositions, so resolve them here. + if (need_transpose) + expr += convert_row_major_matrix(tmp, member_type, 0, false); + else + expr += tmp; + } + + expr += ")"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + assert(matrix_stride); + SPIRType tmp_type = target_type; + if (need_transpose) + swap(tmp_type.vecsize, tmp_type.columns); + + std::string expr; + + expr += type_to_glsl_constructor(tmp_type); + expr += "("; + + for (uint32_t i = 0; i < tmp_type.columns; i++) + { + if (i != 0) + expr += ", "; + + expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, + /* need_transpose= */ false); + } + + expr += ")"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); + + auto buffer_name = to_name(expression_type(base).self); + + if (need_transpose) + { + std::string expr; + + if (target_type.vecsize > 1) + { + expr += type_to_glsl_constructor(target_type); + expr += "("; + } + + for (uint32_t i = 0; i < target_type.vecsize; ++i) + { + if (i != 0) + expr += ", "; + + uint32_t component_offset = result.second + i * matrix_stride; + + assert(component_offset % (target_type.width / 8) == 0); + uint32_t index = component_offset / (target_type.width / 8); + + expr += buffer_name; + expr += "["; + expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + + expr += convert_to_string(index / 4); + expr += "]"; + + expr += vector_swizzle(1, index % 4); + } + + if (target_type.vecsize > 1) + { + expr += ")"; + } + + return expr; + } + else + { + assert(result.second % (target_type.width / 8) == 0); + uint32_t index = result.second / (target_type.width / 8); + + std::string expr; + + expr += buffer_name; + expr += "["; + expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + + expr += convert_to_string(index / 4); + expr += "]"; + + expr += vector_swizzle(target_type.vecsize, index % 4); + + return expr; + } +} + +std::pair CompilerGLSL::flattened_access_chain_offset( + const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, + bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain) +{ + // Start traversing type hierarchy at the proper non-pointer types. + const auto *type = &get_pointee_type(basetype); + + std::string expr; + + // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. + bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; + uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; + uint32_t array_stride = out_array_stride ? *out_array_stride : 0; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + // Pointers + if (ptr_chain && i == 0) + { + // Here, the pointer type will be decorated with an array stride. + array_stride = get_decoration(basetype.self, DecorationArrayStride); + if (!array_stride) + SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); + + auto *constant = maybe_get(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + } + // Arrays + else if (!type->array.empty()) + { + auto *constant = maybe_get(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + + uint32_t parent_type = type->parent_type; + type = &get(parent_type); + + if (!type->array.empty()) + array_stride = get_decoration(parent_type, DecorationArrayStride); + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + index = evaluate_constant_u32(index); + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + offset += type_struct_member_offset(*type, index); + + auto &struct_type = *type; + type = &get(type->member_types[index]); + + if (type->columns > 1) + { + matrix_stride = type_struct_member_matrix_stride(struct_type, index); + row_major_matrix_needs_conversion = + combined_decoration_for_member(struct_type, index).get(DecorationRowMajor); + } + else + row_major_matrix_needs_conversion = false; + + if (!type->array.empty()) + array_stride = type_struct_member_array_stride(struct_type, index); + } + // Matrix -> Vector + else if (type->columns > 1) + { + auto *constant = maybe_get(index); + if (constant) + { + index = evaluate_constant_u32(index); + offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a " + "4-component vector. " + "Likely culprit here is a row-major matrix being accessed dynamically. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + type = &get(type->parent_type); + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + auto *constant = maybe_get(index); + if (constant) + { + index = evaluate_constant_u32(index); + offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); + + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the " + "size of a 4-component vector. " + "This cannot be flattened in legacy targets."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + type = &get(type->parent_type); + } + else + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (need_transpose) + *need_transpose = row_major_matrix_needs_conversion; + if (out_matrix_stride) + *out_matrix_stride = matrix_stride; + if (out_array_stride) + *out_array_stride = array_stride; + + return std::make_pair(expr, offset); +} + +bool CompilerGLSL::should_dereference(uint32_t id) +{ + const auto &type = expression_type(id); + // Non-pointer expressions don't need to be dereferenced. + if (!type.pointer) + return false; + + // Handles shouldn't be dereferenced either. + if (!expression_is_lvalue(id)) + return false; + + // If id is a variable but not a phi variable, we should not dereference it. + if (auto *var = maybe_get(id)) + return var->phi_variable; + + // If id is an access chain, we should not dereference it. + if (auto *expr = maybe_get(id)) + return !expr->access_chain; + + // Otherwise, we should dereference this pointer expression. + return true; +} + +bool CompilerGLSL::should_forward(uint32_t id) const +{ + // If id is a variable we will try to forward it regardless of force_temporary check below + // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL + auto *var = maybe_get(id); + if (var && var->forwardable) + return true; + + // For debugging emit temporary variables for all expressions + if (options.force_temporary) + return false; + + // Immutable expression can always be forwarded. + if (is_immutable(id)) + return true; + + return false; +} + +bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const +{ + // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion. + return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id); +} + +void CompilerGLSL::track_expression_read(uint32_t id) +{ + switch (ir.ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + case TypeAccessChain: + { + auto &e = get(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + default: + break; + } + + // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. + // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. + if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id)) + { + auto &v = expression_usage_counts[id]; + v++; + + // If we create an expression outside a loop, + // but access it inside a loop, we're implicitly reading it multiple times. + // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion + // working inside the backend compiler. + if (expression_read_implies_multiple_reads(id)) + v++; + + if (v >= 2) + { + //if (v == 2) + // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); + + forced_temporaries.insert(id); + // Force a recompile after this pass to avoid forwarding this variable. + force_recompile(); + } + } +} + +bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) +{ + if (forced_temporaries.find(id) != end(forced_temporaries)) + return false; + + for (uint32_t i = 0; i < num_args; i++) + if (!should_forward(args[i])) + return false; + + // We need to forward globals as well. + if (!pure) + { + for (auto global : global_variables) + if (!should_forward(global)) + return false; + for (auto aliased : aliased_variables) + if (!should_forward(aliased)) + return false; + } + + return true; +} + +void CompilerGLSL::register_impure_function_call() +{ + // Impure functions can modify globals and aliased variables, so invalidate them as well. + for (auto global : global_variables) + flush_dependees(get(global)); + for (auto aliased : aliased_variables) + flush_dependees(get(aliased)); +} + +void CompilerGLSL::register_call_out_argument(uint32_t id) +{ + register_write(id); + + auto *var = maybe_get(id); + if (var) + flush_variable_declaration(var->self); +} + +string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) +{ + // These variables are always function local, + // so make sure we emit the variable without storage qualifiers. + // Some backends will inject custom variables locally in a function + // with a storage qualifier which is not function-local. + auto old_storage = var.storage; + var.storage = StorageClassFunction; + auto expr = variable_decl(var); + var.storage = old_storage; + return expr; +} + +void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) +{ + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self)) + { + auto &type = get(var.basetype); + auto &flags = get_decoration_bitset(var.self); + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + flushed_phi_variables.insert(var.self); + } +} + +void CompilerGLSL::flush_variable_declaration(uint32_t id) +{ + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + auto *var = maybe_get(id); + if (var && var->deferred_declaration) + { + string initializer; + if (options.force_zero_initialized_variables && + (var->storage == StorageClassFunction || var->storage == StorageClassGeneric || + var->storage == StorageClassPrivate) && + !var->initializer && type_can_zero_initialize(get_variable_data_type(*var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var))); + } + + statement(variable_decl_function_local(*var), initializer, ";"); + var->deferred_declaration = false; + } + if (var) + { + emit_variable_temporary_copies(*var); + } +} + +bool CompilerGLSL::remove_duplicate_swizzle(string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto prevpos = op.find_last_of('.', pos - 1); + if (prevpos == string::npos) + return false; + + prevpos++; + + // Make sure there are only swizzles here ... + for (auto i = prevpos; i < pos; i++) + { + if (op[i] < 'w' || op[i] > 'z') + { + // If swizzles are foo.xyz() like in C++ backend for example, check for that. + if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') + break; + return false; + } + } + + // If original swizzle is large enough, just carve out the components we need. + // E.g. foobar.wyx.xy will turn into foobar.wy. + if (pos - prevpos >= final_swiz.size()) + { + op.erase(prevpos + final_swiz.size(), string::npos); + + // Add back the function call ... + if (backend.swizzle_is_function) + op += "()"; + } + return true; +} + +// Optimizes away vector swizzles where we have something like +// vec3 foo; +// foo.xyz <-- swizzle expression does nothing. +// This is a very common pattern after OpCompositeCombine. +bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto &type = expression_type(base); + + // Sanity checking ... + assert(type.columns == 1 && type.array.empty()); + + if (type.vecsize == final_swiz.size()) + op.erase(pos, string::npos); + return true; +} + +string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) +{ + ID base = 0; + string op; + string subop; + + // Can only merge swizzles for vectors. + auto &type = get(return_type); + bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; + bool swizzle_optimization = false; + + for (uint32_t i = 0; i < length; i++) + { + auto *e = maybe_get(elems[i]); + + // If we're merging another scalar which belongs to the same base + // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! + if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) + { + // Only supposed to be used for vector swizzle -> scalar. + assert(!e->expression.empty() && e->expression.front() == '.'); + subop += e->expression.substr(1, string::npos); + swizzle_optimization = true; + } + else + { + // We'll likely end up with duplicated swizzles, e.g. + // foobar.xyz.xyz from patterns like + // OpVectorShuffle + // OpCompositeExtract x 3 + // OpCompositeConstruct 3x + other scalar. + // Just modify op in-place. + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. + // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. + // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. + // Essentially, we can only remove one set of swizzles, since that's what we have control over ... + // Case 1: + // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. + // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. + // Case 2: + // foo.xyz: Duplicate swizzle won't kick in. + // If foo is vec3, we can remove xyz, giving just foo. + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + swizzle_optimization = false; + op += subop; + } + else + op += subop; + + if (i) + op += ", "; + + bool uses_buffer_offset = + type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset); + subop = to_composite_constructor_expression(elems[i], uses_buffer_offset); + } + + base = e ? e->base_expression : ID(0); + } + + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + } + + op += subop; + return op; +} + +bool CompilerGLSL::skip_argument(uint32_t id) const +{ + if (!combined_image_samplers.empty() || !options.vulkan_semantics) + { + auto &type = expression_type(id); + if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) + return true; + } + return false; +} + +bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) +{ + // Do this with strings because we have a very clear pattern we can check for and it avoids + // adding lots of special cases to the code emission. + if (rhs.size() < lhs.size() + 3) + return false; + + // Do not optimize matrices. They are a bit awkward to reason about in general + // (in which order does operation happen?), and it does not work on MSL anyways. + if (type.vecsize > 1 && type.columns > 1) + return false; + + auto index = rhs.find(lhs); + if (index != 0) + return false; + + // TODO: Shift operators, but it's not important for now. + auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); + if (op != lhs.size() + 1) + return false; + + // Check that the op is followed by space. This excludes && and ||. + if (rhs[op + 1] != ' ') + return false; + + char bop = rhs[op]; + auto expr = rhs.substr(lhs.size() + 3); + // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. + // Find some common patterns which are equivalent. + if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) + statement(lhs, bop, bop, ";"); + else + statement(lhs, " ", bop, "= ", expr, ";"); + return true; +} + +void CompilerGLSL::register_control_dependent_expression(uint32_t expr) +{ + if (forwarded_temporaries.find(expr) == end(forwarded_temporaries)) + return; + + assert(current_emitting_block); + current_emitting_block->invalidate_expressions.push_back(expr); +} + +void CompilerGLSL::emit_block_instructions(SPIRBlock &block) +{ + current_emitting_block = █ + for (auto &op : block.ops) + emit_instruction(op); + current_emitting_block = nullptr; +} + +void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) +{ + // Allow trivially forwarded expressions like OpLoad or trivial shuffles, + // these will be marked as having suppressed usage tracking. + // Our only concern is to make sure arithmetic operations are done in similar ways. + if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) && + forced_invariant_temporaries.count(expr.self) == 0) + { + forced_temporaries.insert(expr.self); + forced_invariant_temporaries.insert(expr.self); + force_recompile(); + + for (auto &dependent : expr.expression_dependencies) + disallow_forwarding_in_expression_chain(get(dependent)); + } +} + +void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) +{ + // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to + // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary + // in one translation unit, but not another, e.g. due to multiple use of an expression. + // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent + // expressions to be temporaries. + // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough + // for all reasonable uses of invariant. + if (!has_decoration(store_id, DecorationInvariant)) + return; + + auto *expr = maybe_get(value_id); + if (!expr) + return; + + disallow_forwarding_in_expression_chain(*expr); +} + +void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) +{ + auto rhs = to_pointer_expression(rhs_expression); + + // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. + if (!rhs.empty()) + { + handle_store_to_invariant_variable(lhs_expression, rhs_expression); + + auto lhs = to_dereferenced_expression(lhs_expression); + + // We might need to cast in order to store to a builtin. + cast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression)); + + // Tries to optimize assignments like " = op expr". + // While this is purely cosmetic, this is important for legacy ESSL where loop + // variable increments must be in either i++ or i += const-expr. + // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + register_write(lhs_expression); + } +} + +uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const +{ + if (instr.length < 3) + return 32; + + auto *ops = stream(instr); + + switch (instr.op) + { + case OpSConvert: + case OpConvertSToF: + case OpUConvert: + case OpConvertUToF: + case OpIEqual: + case OpINotEqual: + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: + return expression_type(ops[2]).width; + + default: + { + // We can look at result type which is more robust. + auto *type = maybe_get(ops[0]); + if (type && type_is_integral(*type)) + return type->width; + else + return 32; + } + } +} + +uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const +{ + if (length < 1) + return 32; + + switch (op) + { + case GLSLstd450SAbs: + case GLSLstd450SSign: + case GLSLstd450UMin: + case GLSLstd450SMin: + case GLSLstd450UMax: + case GLSLstd450SMax: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + return expression_type(ops[0]).width; + + default: + { + // We don't need to care about other opcodes, just return 32. + return 32; + } + } +} + +void CompilerGLSL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + uint32_t length = instruction.length; + +#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (opcode) + { + // Dealing with memory + case OpLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + flush_variable_declaration(ptr); + + // If we're loading from memory that cannot be changed by the shader, + // just forward the expression directly to avoid needless temporaries. + // If an expression is mutable and forwardable, we speculate that it is immutable. + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If loading a non-native row-major matrix, mark the expression as need_transpose. + bool need_transpose = false; + bool old_need_transpose = false; + + auto *ptr_expression = maybe_get(ptr); + + if (forward) + { + // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while + // taking the expression. + if (ptr_expression && ptr_expression->need_transpose) + { + old_need_transpose = true; + ptr_expression->need_transpose = false; + need_transpose = true; + } + else if (is_non_native_row_major_matrix(ptr)) + need_transpose = true; + } + + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + string expr; + + bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked); + bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID); + if (forward || (!is_packed && !is_remapped)) + { + // For the simple case, we do not need to deal with repacking. + expr = to_dereferenced_expression(ptr, false); + } + else + { + // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before + // storing the expression to a temporary. + expr = to_unpacked_expression(ptr); + } + + auto &type = get(result_type); + auto &expr_type = expression_type(ptr); + + // If the expression has more vector components than the result type, insert + // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might + // happen with e.g. the MSL backend replacing the type of an input variable. + if (expr_type.vecsize > type.vecsize) + expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0)); + + // We might need to cast in order to load from a builtin. + cast_from_builtin_load(ptr, expr, type); + + // We might be trying to load a gl_Position[N], where we should be + // doing float4[](gl_in[i].gl_Position, ...) instead. + // Similar workarounds are required for input arrays in tessellation. + unroll_array_from_complex_load(id, ptr, expr); + + // Shouldn't need to check for ID, but current glslang codegen requires it in some cases + // when loading Image/Sampler descriptors. It does not hurt to check ID as well. + if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT)) + { + propagate_nonuniform_qualifier(ptr); + convert_non_uniform_expression(type, expr); + } + + if (forward && ptr_expression) + ptr_expression->need_transpose = old_need_transpose; + + bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0; + + if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened) + rewrite_load_for_wrapped_row_major(expr, result_type, ptr); + + // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. + // However, if we try to load a complex, composite object from a flattened buffer, + // we should avoid emitting the same code over and over and lower the result to a temporary. + bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); + + SPIRExpression *e = nullptr; + if (!forward && expression_is_non_value_type_array(ptr)) + { + // Complicated load case where we need to make a copy of ptr, but we cannot, because + // it is an array, and our backend does not support arrays as value types. + // Emit the temporary, and copy it explicitly. + e = &emit_uninitialized_temporary_expression(result_type, id); + emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); + } + else + e = &emit_op(result_type, id, expr, forward, !usage_tracking); + + e->need_transpose = need_transpose; + register_read(id, ptr, forward); + + if (forward) + { + // Pass through whether the result is of a packed type and the physical type ID. + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked)) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)) + { + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, + get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)); + } + } + else + { + // This might have been set on an earlier compilation iteration, force it to be unset. + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + } + + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(*e, ptr); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + auto *var = maybe_get(ops[2]); + if (var) + flush_variable_declaration(var->self); + + // If the base is immutable, the access chain pointer must also be. + // If an expression is mutable and forwardable, we speculate that it is immutable. + AccessChainMeta meta; + bool ptr_chain = opcode == OpPtrAccessChain; + auto e = access_chain(ops[2], &ops[3], length - 3, get(ops[0]), &meta, ptr_chain); + + auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); + + auto *backing_variable = maybe_get_backing_variable(ops[2]); + expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); + expr.need_transpose = meta.need_transpose; + expr.access_chain = true; + + // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. + if (meta.storage_is_packed) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); + if (meta.storage_is_invariant) + set_decoration(ops[1], DecorationInvariant); + if (meta.flattened_struct) + flattened_structs[ops[1]] = true; + + // If we have some expression dependencies in our access chain, this access chain is technically a forwarded + // temporary which could be subject to invalidation. + // Need to assume we're forwarded while calling inherit_expression_depdendencies. + forwarded_temporaries.insert(ops[1]); + // The access chain itself is never forced to a temporary, but its dependencies might. + suppressed_usage_tracking.insert(ops[1]); + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(expr, ops[i]); + } + + // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, + // we're not forwarded after all. + if (expr.expression_dependencies.empty()) + forwarded_temporaries.erase(ops[1]); + + if (has_decoration(ops[1], DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(ops[1]); + + break; + } + + case OpStore: + { + auto *var = maybe_get(ops[0]); + + if (var && var->statically_assigned) + var->static_expression = ops[1]; + else if (var && var->loop_variable && !var->loop_variable_enable) + var->static_expression = ops[1]; + else if (var && var->remapped_variable && var->static_expression) + { + // Skip the write. + } + else if (flattened_structs.count(ops[0])) + { + store_flattened_struct(ops[0], ops[1]); + register_write(ops[0]); + } + else + { + emit_store_statement(ops[0], ops[1]); + } + + // Storing a pointer results in a variable pointer, so we must conservatively assume + // we can write through it. + if (expression_type(ops[1]).pointer) + register_write(ops[1]); + break; + } + + case OpArrayLength: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + set(id, join(type_to_glsl(get(result_type)), "(", e, ".length())"), result_type, + true); + break; + } + + // Function calls + case OpFunctionCall: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t func = ops[2]; + const auto *arg = &ops[3]; + length -= 3; + + auto &callee = get(func); + auto &return_type = get(callee.return_type); + bool pure = function_is_pure(callee); + + bool callee_has_out_variables = false; + bool emit_return_value_as_argument = false; + + // Invalidate out variables passed to functions since they can be OpStore'd to. + for (uint32_t i = 0; i < length; i++) + { + if (callee.arguments[i].write_count) + { + register_call_out_argument(arg[i]); + callee_has_out_variables = true; + } + + flush_variable_declaration(arg[i]); + } + + if (!return_type.array.empty() && !backend.can_return_array) + { + callee_has_out_variables = true; + emit_return_value_as_argument = true; + } + + if (!pure) + register_impure_function_call(); + + string funexpr; + SmallVector arglist; + funexpr += to_name(func) + "("; + + if (emit_return_value_as_argument) + { + statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); + arglist.push_back(to_name(id)); + } + + for (uint32_t i = 0; i < length; i++) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg[i])) + continue; + + arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i])); + } + + for (auto &combined : callee.combined_parameters) + { + auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); + auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); + arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); + } + + append_global_func_args(callee, length, arglist); + + funexpr += merge(arglist); + funexpr += ")"; + + // Check for function call constraints. + check_function_call_constraints(arg, length); + + if (return_type.basetype != SPIRType::Void) + { + // If the function actually writes to an out variable, + // take the conservative route and do not forward. + // The problem is that we might not read the function + // result (and emit the function) before an out variable + // is read (common case when return value is ignored! + // In order to avoid start tracking invalid variables, + // just avoid the forwarding problem altogether. + bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && + (forced_temporaries.find(id) == end(forced_temporaries)); + + if (emit_return_value_as_argument) + { + statement(funexpr, ";"); + set(id, to_name(id), result_type, true); + } + else + emit_op(result_type, id, funexpr, forward); + + // Function calls are implicit loads from all variables in question. + // Set dependencies for them. + for (uint32_t i = 0; i < length; i++) + register_read(id, arg[i], forward); + + // If we're going to forward the temporary result, + // put dependencies on every variable that must not change. + if (forward) + register_global_read_dependencies(callee, id); + } + else + statement(funexpr, ";"); + + break; + } + + // Composite munging + case OpCompositeConstruct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + const auto *const elems = &ops[2]; + length -= 2; + + bool forward = true; + for (uint32_t i = 0; i < length; i++) + forward = forward && should_forward(elems[i]); + + auto &out_type = get(result_type); + auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr; + + // Only splat if we have vector constructors. + // Arrays and structs must be initialized properly in full. + bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; + + bool splat = false; + bool swizzle_splat = false; + + if (in_type) + { + splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; + swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; + + if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } + } + + if (splat || swizzle_splat) + { + uint32_t input = elems[0]; + for (uint32_t i = 0; i < length; i++) + { + if (input != elems[i]) + { + splat = false; + swizzle_splat = false; + } + } + } + + if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + forward = false; + if (!out_type.array.empty() && !backend.can_declare_arrays_inline) + forward = false; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + forward = false; + + string constructor_op; + if (backend.use_initializer_list && composite) + { + bool needs_trailing_tracket = false; + // Only use this path if we are building composites. + // This path cannot be used for arithmetic. + if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) + constructor_op += type_to_glsl_constructor(get(result_type)); + else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty()) + { + // MSL path. Array constructor is baked into type here, do not use _constructor variant. + constructor_op += type_to_glsl_constructor(get(result_type)) + "("; + needs_trailing_tracket = true; + } + constructor_op += "{ "; + + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_unpacked_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += " }"; + if (needs_trailing_tracket) + constructor_op += ")"; + } + else if (swizzle_splat && !composite) + { + constructor_op = remap_swizzle(get(result_type), 1, to_unpacked_expression(elems[0])); + } + else + { + constructor_op = type_to_glsl_constructor(get(result_type)) + "("; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_unpacked_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += ")"; + } + + if (!constructor_op.empty()) + { + emit_op(result_type, id, constructor_op, forward); + for (uint32_t i = 0; i < length; i++) + inherit_expression_dependencies(id, elems[i]); + } + break; + } + + case OpVectorInsertDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec = ops[2]; + uint32_t comp = ops[3]; + uint32_t index = ops[4]; + + flush_variable_declaration(vec); + + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(vec), ";"); + set(id, to_name(id), result_type, true); + auto chain = access_chain_internal(id, &index, 1, 0, nullptr); + statement(chain, " = ", to_unpacked_expression(comp), ";"); + break; + } + + case OpVectorExtractDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr); + emit_op(result_type, id, expr, should_forward(ops[2])); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpCompositeExtract: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + length -= 3; + + auto &type = get(result_type); + + // We can only split the expression here if our expression is forwarded as a temporary. + bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); + + // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. + auto &composite_type = expression_type(ops[2]); + if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty()) + allow_base_expression = false; + + // Packed expressions or physical ID mapped expressions cannot be split up. + if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) || + has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID)) + allow_base_expression = false; + + // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern + // into the base expression. + if (is_non_native_row_major_matrix(ops[2])) + allow_base_expression = false; + + AccessChainMeta meta; + SPIRExpression *e = nullptr; + + // Only apply this optimization if result is scalar. + if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) + { + // We want to split the access chain from the base. + // This is so we can later combine different CompositeExtract results + // with CompositeConstruct without emitting code like + // + // vec3 temp = texture(...).xyz + // vec4(temp.x, temp.y, temp.z, 1.0). + // + // when we actually wanted to emit this + // vec4(texture(...).xyz, 1.0). + // + // Including the base will prevent this and would trigger multiple reads + // from expression causing it to be forced to an actual temporary in GLSL. + auto expr = access_chain_internal(ops[2], &ops[3], length, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta); + e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); + inherit_expression_dependencies(id, ops[2]); + e->base_expression = ops[2]; + } + else + { + auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2])); + inherit_expression_dependencies(id, ops[2]); + } + + // Pass through some meta information to the loaded expression. + // We can still end up loading a buffer type to a variable, then CompositeExtract from it + // instead of loading everything through an access chain. + e->need_transpose = meta.need_transpose; + if (meta.storage_is_packed) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); + if (meta.storage_is_invariant) + set_decoration(id, DecorationInvariant); + + break; + } + + case OpCompositeInsert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t obj = ops[2]; + uint32_t composite = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + flush_variable_declaration(composite); + + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(composite), ";"); + set(id, to_name(id), result_type, true); + auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + statement(chain, " = ", to_unpacked_expression(obj), ";"); + + break; + } + + case OpCopyMemory: + { + uint32_t lhs = ops[0]; + uint32_t rhs = ops[1]; + if (lhs != rhs) + { + flush_variable_declaration(lhs); + flush_variable_declaration(rhs); + statement(to_expression(lhs), " = ", to_unpacked_expression(rhs), ";"); + register_write(lhs); + } + break; + } + + case OpCopyLogical: + { + // This is used for copying object of different types, arrays and structs. + // We need to unroll the copy, element-by-element. + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + + emit_uninitialized_temporary_expression(result_type, id); + emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {}); + break; + } + + case OpCopyObject: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + bool pointer = get(result_type).pointer; + + auto *chain = maybe_get(rhs); + auto *imgsamp = maybe_get(rhs); + if (chain) + { + // Cannot lower to a SPIRExpression, just copy the object. + auto &e = set(id, *chain); + e.self = id; + } + else if (imgsamp) + { + // Cannot lower to a SPIRExpression, just copy the object. + // GLSL does not currently use this type and will never get here, but MSL does. + // Handled here instead of CompilerMSL for better integration and general handling, + // and in case GLSL or other subclasses require it in the future. + auto &e = set(id, *imgsamp); + e.self = id; + } + else if (expression_is_lvalue(rhs) && !pointer) + { + // Need a copy. + // For pointer types, we copy the pointer itself. + statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";"); + set(id, to_name(id), result_type, true); + } + else + { + // RHS expression is immutable, so just forward it. + // Copying these things really make no sense, but + // seems to be allowed anyways. + auto &e = set(id, to_expression(rhs), result_type, true); + if (pointer) + { + auto *var = maybe_get_backing_variable(rhs); + e.loaded_from = var ? var->self : ID(0); + } + + // If we're copying an access chain, need to inherit the read expressions. + auto *rhs_expr = maybe_get(rhs); + if (rhs_expr) + { + e.implied_read_expressions = rhs_expr->implied_read_expressions; + e.expression_dependencies = rhs_expr->expression_dependencies; + } + } + break; + } + + case OpVectorShuffle: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec0 = ops[2]; + uint32_t vec1 = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + auto &type0 = expression_type(vec0); + + // If we have the undefined swizzle index -1, we need to swizzle in undefined data, + // or in our case, T(0). + bool shuffle = false; + for (uint32_t i = 0; i < length; i++) + if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) + shuffle = true; + + // Cannot use swizzles with packed expressions, force shuffle path. + if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked)) + shuffle = true; + + string expr; + bool should_fwd, trivial_forward; + + if (shuffle) + { + should_fwd = should_forward(vec0) && should_forward(vec1); + trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1); + + // Constructor style and shuffling from two different vectors. + SmallVector args; + for (uint32_t i = 0; i < length; i++) + { + if (elems[i] == 0xffffffffu) + { + // Use a constant 0 here. + // We could use the first component or similar, but then we risk propagating + // a value we might not need, and bog down codegen. + SPIRConstant c; + c.constant_type = type0.parent_type; + assert(type0.parent_type != ID(0)); + args.push_back(constant_expression(c)); + } + else if (elems[i] >= type0.vecsize) + args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize)); + else + args.push_back(to_extract_component_expression(vec0, elems[i])); + } + expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); + } + else + { + should_fwd = should_forward(vec0); + trivial_forward = should_suppress_usage_tracking(vec0); + + // We only source from first vector, so can use swizzle. + // If the vector is packed, unpack it before applying a swizzle (needed for MSL) + expr += to_enclosed_unpacked_expression(vec0); + expr += "."; + for (uint32_t i = 0; i < length; i++) + { + assert(elems[i] != 0xffffffffu); + expr += index_to_swizzle(elems[i]); + } + + if (backend.swizzle_is_function && length > 1) + expr += "()"; + } + + // A shuffle is trivial in that it doesn't actually *do* anything. + // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. + + emit_op(result_type, id, expr, should_fwd, trivial_forward); + + inherit_expression_dependencies(id, vec0); + if (vec0 != vec1) + inherit_expression_dependencies(id, vec1); + break; + } + + // ALU + case OpIsNan: + GLSL_UFOP(isnan); + break; + + case OpIsInf: + GLSL_UFOP(isinf); + break; + + case OpSNegate: + case OpFNegate: + GLSL_UOP(-); + break; + + case OpIAdd: + { + // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(+, type); + break; + } + + case OpFAdd: + GLSL_BOP(+); + break; + + case OpISub: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(-, type); + break; + } + + case OpFSub: + GLSL_BOP(-); + break; + + case OpIMul: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(*, type); + break; + } + + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + // If the matrix needs transpose, just flip the multiply order. + auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); + if (e && e->need_transpose) + { + e->need_transpose = false; + string expr; + + if (opcode == OpMatrixTimesVector) + expr = join(to_enclosed_unpacked_expression(ops[3]), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + else + expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + to_enclosed_unpacked_expression(ops[2])); + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + emit_op(ops[0], ops[1], expr, forward); + e->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + GLSL_BOP(*); + break; + } + + case OpMatrixTimesMatrix: + { + auto *a = maybe_get(ops[2]); + auto *b = maybe_get(ops[3]); + + // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. + // a^T * b^T = (b * a)^T. + if (a && b && a->need_transpose && b->need_transpose) + { + a->need_transpose = false; + b->need_transpose = false; + auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + b->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + GLSL_BOP(*); + + break; + } + + case OpFMul: + case OpMatrixTimesScalar: + case OpVectorTimesScalar: + GLSL_BOP(*); + break; + + case OpOuterProduct: + GLSL_BFOP(outerProduct); + break; + + case OpDot: + GLSL_BFOP(dot); + break; + + case OpTranspose: + if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 + { + // transpose() is not available, so instead, flip need_transpose, + // which can later be turned into an emulated transpose op by + // convert_row_major_matrix(), if necessary. + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t input = ops[2]; + + // Force need_transpose to false temporarily to prevent + // to_expression() from doing the transpose. + bool need_transpose = false; + auto *input_e = maybe_get(input); + if (input_e) + swap(need_transpose, input_e->need_transpose); + + bool forward = should_forward(input); + auto &e = emit_op(result_type, result_id, to_expression(input), forward); + e.need_transpose = !need_transpose; + + // Restore the old need_transpose flag. + if (input_e) + input_e->need_transpose = need_transpose; + } + else + GLSL_UFOP(transpose); + break; + + case OpSRem: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } + + case OpSDiv: + GLSL_BOP_CAST(/, int_type); + break; + + case OpUDiv: + GLSL_BOP_CAST(/, uint_type); + break; + + case OpIAddCarry: + case OpISubBorrow: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; + + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", + to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");"); + break; + } + + case OpUMulExtended: + case OpSMulExtended: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; + + statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", + to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");"); + break; + } + + case OpFDiv: + GLSL_BOP(/); + break; + + case OpShiftRightLogical: + GLSL_BOP_CAST(>>, uint_type); + break; + + case OpShiftRightArithmetic: + GLSL_BOP_CAST(>>, int_type); + break; + + case OpShiftLeftLogical: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(<<, type); + break; + } + + case OpBitwiseOr: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(|, type); + break; + } + + case OpBitwiseXor: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(^, type); + break; + } + + case OpBitwiseAnd: + { + auto type = get(ops[0]).basetype; + GLSL_BOP_CAST(&, type); + break; + } + + case OpNot: + GLSL_UOP(~); + break; + + case OpUMod: + GLSL_BOP_CAST(%, uint_type); + break; + + case OpSMod: + GLSL_BOP_CAST(%, int_type); + break; + + case OpFMod: + GLSL_BFOP(mod); + break; + + case OpFRem: + { + if (is_legacy()) + SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is " + "needed for legacy."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } + + // Relational + case OpAny: + GLSL_UFOP(any); + break; + + case OpAll: + GLSL_UFOP(all); + break; + + case OpSelect: + emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); + break; + + case OpLogicalOr: + { + // No vector variant in GLSL for logical OR. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get(result_type); + + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown); + else + GLSL_BOP(||); + break; + } + + case OpLogicalAnd: + { + // No vector variant in GLSL for logical AND. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get(result_type); + + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown); + else + GLSL_BOP(&&); + break; + } + + case OpLogicalNot: + { + auto &type = get(ops[0]); + if (type.vecsize > 1) + GLSL_UFOP(not ); + else + GLSL_UOP(!); + break; + } + + case OpIEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(equal, int_type); + else + GLSL_BOP_CAST(==, int_type); + break; + } + + case OpLogicalEqual: + case OpFOrdEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(equal); + else + GLSL_BOP(==); + break; + } + + case OpINotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(notEqual, int_type); + else + GLSL_BOP_CAST(!=, int_type); + break; + } + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(notEqual); + else + GLSL_BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + { + auto type = opcode == OpUGreaterThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThan, type); + else + GLSL_BOP_CAST(>, type); + break; + } + + case OpFOrdGreaterThan: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThan); + else + GLSL_BOP(>); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThanEqual, type); + else + GLSL_BOP_CAST(>=, type); + break; + } + + case OpFOrdGreaterThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThanEqual); + else + GLSL_BOP(>=); + break; + } + + case OpULessThan: + case OpSLessThan: + { + auto type = opcode == OpULessThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThan, type); + else + GLSL_BOP_CAST(<, type); + break; + } + + case OpFOrdLessThan: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThan); + else + GLSL_BOP(<); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto type = opcode == OpULessThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThanEqual, type); + else + GLSL_BOP_CAST(<=, type); + break; + } + + case OpFOrdLessThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThanEqual); + else + GLSL_BOP(<=); + break; + } + + // Conversion + case OpSConvert: + case OpConvertSToF: + case OpUConvert: + case OpConvertUToF: + { + auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto &type = get(result_type); + auto &arg_type = expression_type(ops[2]); + auto func = type_to_glsl_constructor(type); + + if (arg_type.width < type.width || type_is_floating_point(type)) + emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype); + else + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + break; + } + + case OpConvertFToU: + case OpConvertFToS: + { + // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &type = get(result_type); + auto expected_type = type; + auto &float_type = expression_type(ops[2]); + expected_type.basetype = + opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width); + + auto func = type_to_glsl_constructor(expected_type); + emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype); + break; + } + + case OpFConvert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto func = type_to_glsl_constructor(get(result_type)); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + break; + } + + case OpBitcast: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + if (!emit_complex_bitcast(result_type, id, arg)) + { + auto op = bitcast_glsl_op(get(result_type), expression_type(arg)); + emit_unary_func_op(result_type, id, arg, op.c_str()); + } + break; + } + + case OpQuantizeToF16: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + string op; + auto &type = get(result_type); + + switch (type.vecsize) + { + case 1: + op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); + break; + case 2: + op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); + break; + case 3: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); + op = join("vec3(", op0, ", ", op1, ")"); + break; + } + case 4: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); + op = join("vec4(", op0, ", ", op1, ")"); + break; + } + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + + emit_op(result_type, id, op, should_forward(arg)); + inherit_expression_dependencies(id, arg); + break; + } + + // Derivatives + case OpDPdx: + GLSL_UFOP(dFdx); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + GLSL_UFOP(dFdy); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxFine: + GLSL_UFOP(dFdxFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyFine: + GLSL_UFOP(dFdyFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxCoarse: + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + GLSL_UFOP(dFdxCoarse); + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyCoarse: + GLSL_UFOP(dFdyCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + GLSL_UFOP(fwidth); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidthCoarse: + GLSL_UFOP(fwidthCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidthFine: + GLSL_UFOP(fwidthFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + // Bitfield + case OpBitFieldInsert: + { + emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int); + break; + } + + case OpBitFieldSExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type, + SPIRType::Int, SPIRType::Int); + break; + } + + case OpBitFieldUExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type, + SPIRType::Int, SPIRType::Int); + break; + } + + case OpBitReverse: + // BitReverse does not have issues with sign since result type must match input type. + GLSL_UFOP(bitfieldReverse); + break; + + case OpBitCount: + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type); + break; + } + + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[5]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, ptr, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicCompareExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; + + forced_temporaries.insert(id); + emit_trinary_func_op(result_type, id, ptr, comp, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicLoad: + { + // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + auto &type = expression_type(ops[2]); + forced_temporaries.insert(ops[1]); + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; + const char *increment = unsigned_type ? "0u" : "0"; + emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicStore: + { + // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + uint32_t ptr = ops[0]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[3]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + statement(op, "(", to_expression(ptr), ", ", to_expression(val), ");"); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicIIncrement: + case OpAtomicIDecrement: + { + forced_temporaries.insert(ops[1]); + auto &type = expression_type(ops[2]); + if (type.storage == StorageClassAtomicCounter) + { + // Legacy GLSL stuff, not sure if this is relevant to support. + if (opcode == OpAtomicIIncrement) + GLSL_UFOP(atomicCounterIncrement); + else + GLSL_UFOP(atomicCounterDecrement); + } + else + { + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; + + const char *increment = nullptr; + if (opcode == OpAtomicIIncrement && unsigned_type) + increment = "1u"; + else if (opcode == OpAtomicIIncrement) + increment = "1"; + else if (unsigned_type) + increment = "uint(-1)"; + else + increment = "-1"; + + emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); + } + + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicIAdd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicISub: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); + emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicSMin: + case OpAtomicUMin: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicSMax: + case OpAtomicUMax: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicAnd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicOr: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicXor: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + break; + } + + // Geometry shaders + case OpEmitVertex: + statement("EmitVertex();"); + break; + + case OpEndPrimitive: + statement("EndPrimitive();"); + break; + + case OpEmitStreamVertex: + { + if (options.es) + SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); + + auto stream_expr = to_expression(ops[0]); + if (expression_type(ops[0]).basetype != SPIRType::Int) + stream_expr = join("int(", stream_expr, ")"); + statement("EmitStreamVertex(", stream_expr, ");"); + break; + } + + case OpEndStreamPrimitive: + { + if (options.es) + SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); + + auto stream_expr = to_expression(ops[0]); + if (expression_type(ops[0]).basetype != SPIRType::Int) + stream_expr = join("int(", stream_expr, ")"); + statement("EndStreamPrimitive(", stream_expr, ");"); + break; + } + + // Textures + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction, false); + break; + + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseFetch: + case OpImageSparseGather: + case OpImageSparseDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction, true); + break; + + case OpImageSparseTexelsResident: + if (options.es) + SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL."); + require_extension_internal("GL_ARB_sparse_texture2"); + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // Suppress usage tracking. + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + + // When using the image, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); + break; + } + + case OpImageQueryLod: + { + if (!options.es && options.version < 400) + { + require_extension_internal("GL_ARB_texture_query_lod"); + // For some reason, the ARB spec is all-caps. + GLSL_BFOP(textureQueryLOD); + } + else if (options.es) + SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); + else + GLSL_BFOP(textureQueryLod); + register_control_dependent_expression(ops[1]); + break; + } + + case OpImageQueryLevels: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_texture_query_levels"); + if (options.es) + SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); + + auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")"); + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySamples: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + string expr; + if (type.image.sampled == 2) + expr = join("imageSamples(", to_expression(ops[2]), ")"); + else + expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpSampledImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_sampled_image_op(result_type, id, ops[2], ops[3]); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpImageQuerySizeLod: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + + std::string fname = "textureSize"; + if (is_legacy_desktop()) + { + auto &type = expression_type(img); + auto &imgtype = get(type.self); + fname = legacy_tex_op(fname, imgtype, img); + } + else if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100."); + + auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ", + bitcast_expression(SPIRType::Int, ops[3]), ")"); + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + // Image load/store + case OpImageRead: + case OpImageSparseRead: + { + // We added Nonreadable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to read the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + { + auto &flags = ir.meta[var->self].decoration.decoration_flags; + if (flags.get(DecorationNonReadable)) + { + flags.clear(DecorationNonReadable); + force_recompile(); + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + bool pure; + string imgexpr; + auto &type = expression_type(ops[2]); + + if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code + { + if (type.image.ms) + SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); + + auto itr = + find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); + + if (itr == end(pls_inputs)) + { + // For non-PLS inputs, we rely on subpass type remapping information to get it right + // since ImageRead always returns 4-component vectors and the backing type is opaque. + if (!var->remapped_components) + SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); + imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); + } + else + { + // PLS input could have different number of components than what the SPIR expects, swizzle to + // the appropriate vector size. + uint32_t components = pls_format_to_components(itr->format); + imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); + } + pure = true; + } + else if (type.image.dim == DimSubpassData) + { + if (var && subpass_input_is_framebuffer_fetch(var->self)) + { + imgexpr = to_expression(var->self); + } + else if (options.vulkan_semantics) + { + // With Vulkan semantics, use the proper Vulkan GLSL construct. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); + } + else + imgexpr = join("subpassLoad(", to_expression(ops[2]), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", + to_expression(samples), ")"); + } + else + { + // Implement subpass loads via texture barrier style sampling. + imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); + } + } + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); + pure = true; + } + else + { + bool sparse = opcode == OpImageSparseRead; + uint32_t sparse_code_id = 0; + uint32_t sparse_texel_id = 0; + if (sparse) + emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id); + + // imageLoad only accepts int coords, not uint. + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + + // Plain image load/store. + if (sparse) + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ", + coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");"); + } + else + { + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_expression(ops[2]), ", ", + coord_expr, ", ", to_expression(sparse_texel_id), ");"); + } + imgexpr = join(type_to_glsl(get(result_type)), "(", to_expression(sparse_code_id), ", ", + to_expression(sparse_texel_id), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = + join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); + } + else + imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")"); + } + + if (!sparse) + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); + pure = false; + } + + if (var && var->forwardable) + { + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); + + // We only need to track dependencies if we're reading from image load/store. + if (!pure) + { + e.loaded_from = var->self; + if (forward) + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false); + + inherit_expression_dependencies(id, ops[2]); + if (type.image.ms) + inherit_expression_dependencies(id, ops[5]); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + + auto expr = join(to_expression(ops[2]), ", ", coord_expr); + if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT)) + convert_non_uniform_expression(expression_type(ops[2]), expr); + + auto &e = set(id, expr, result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpImageWrite: + { + // We added Nonwritable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to write to the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[0]); + if (var) + { + auto &flags = ir.meta[var->self].decoration.decoration_flags; + if (flags.get(DecorationNonWritable)) + { + flags.clear(DecorationNonWritable); + force_recompile(); + } + } + + auto &type = expression_type(ops[0]); + auto &value_type = expression_type(ops[2]); + auto store_type = value_type; + store_type.vecsize = 4; + + // imageStore only accepts int coords, not uint. + auto coord_expr = to_expression(ops[1]); + auto target_coord_type = expression_type(ops[1]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); + + if (type.image.ms) + { + uint32_t operands = ops[3]; + if (operands != ImageOperandsSampleMask || length != 5) + SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); + uint32_t samples = ops[4]; + statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + } + else + statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageQuerySize: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.basetype == SPIRType::Image) + { + string expr; + if (type.image.sampled == 2) + { + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_shader_image_size"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize."); + + // The size of an image is always constant. + expr = join("imageSize(", to_expression(ops[2]), ")"); + } + else + { + // This path is hit for samplerBuffers and multisampled images which do not have LOD. + std::string fname = "textureSize"; + if (is_legacy()) + { + auto &imgtype = get(type.self); + fname = legacy_tex_op(fname, imgtype, ops[2]); + } + expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")"); + } + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + } + else + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); + break; + } + + // Compute + case OpControlBarrier: + case OpMemoryBarrier: + { + uint32_t execution_scope = 0; + uint32_t memory; + uint32_t semantics; + + if (opcode == OpMemoryBarrier) + { + memory = evaluate_constant_u32(ops[0]); + semantics = evaluate_constant_u32(ops[1]); + } + else + { + execution_scope = evaluate_constant_u32(ops[0]); + memory = evaluate_constant_u32(ops[1]); + semantics = evaluate_constant_u32(ops[2]); + } + + if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) + { + // OpControlBarrier with ScopeSubgroup is subgroupBarrier() + if (opcode != OpControlBarrier) + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier); + } + else + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier); + } + } + + if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) + { + // Control shaders only have barriers, and it implies memory barriers. + if (opcode == OpControlBarrier) + statement("barrier();"); + break; + } + + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = evaluate_constant_u32(next_ops[1]); + uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + bool memory_scope_covered = false; + if (next_memory == memory) + memory_scope_covered = true; + else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) + { + // If we only care about workgroup memory, either Device or Workgroup scope is fine, + // scope does not have to match. + if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && + (memory == ScopeDevice || memory == ScopeWorkgroup)) + { + memory_scope_covered = true; + } + } + else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) + { + // The control barrier has device scope, but the memory barrier just has workgroup scope. + memory_scope_covered = true; + } + + // If we have the same memory scope, and all memory types are covered, we're good. + if (memory_scope_covered && (semantics & next_semantics) == semantics) + break; + } + } + + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) + { + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + } + + if (memory == ScopeWorkgroup) // Only need to consider memory within a group + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + { + // OpControlBarrier implies a memory barrier for shared memory as well. + bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup; + if (!implies_shared_barrier) + statement("memoryBarrierShared();"); + } + else if (semantics != 0) + statement("groupMemoryBarrier();"); + } + else if (memory == ScopeSubgroup) + { + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("subgroupMemoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 3 barriers. + statement("subgroupMemoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("subgroupMemoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("subgroupMemoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("subgroupMemoryBarrierImage();"); + } + } + else + { + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("memoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 4 barriers. + statement("memoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("memoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("memoryBarrierImage();"); + } + } + + if (opcode == OpControlBarrier) + { + if (execution_scope == ScopeSubgroup) + statement("subgroupBarrier();"); + else + statement("barrier();"); + } + break; + } + + case OpExtInst: + { + uint32_t extension_set = ops[2]; + + if (get(extension_set).ext == SPIRExtension::GLSL) + { + emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot) + { + emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) + { + emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) + { + emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader) + { + emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get(extension_set).ext == SPIRExtension::SPV_debug_info) + { + break; // Ignore SPIR-V debug information extended instructions. + } + else + { + statement("// unimplemented ext op ", instruction.op); + break; + } + + break; + } + + // Legacy sub-group stuff ... + case OpSubgroupBallotKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + string expr; + expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)"); + emit_op(result_type, id, expr, should_forward(ops[2])); + + require_extension_internal("GL_ARB_shader_ballot"); + inherit_expression_dependencies(id, ops[2]); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupFirstInvocationKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); + + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupReadInvocationKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); + + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAllKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAnyKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAllEqualKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupIAddNonUniformAMD: + case OpGroupFAddNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupFMinNonUniformAMD: + case OpGroupUMinNonUniformAMD: + case OpGroupSMinNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupFMaxNonUniformAMD: + case OpGroupUMaxNonUniformAMD: + case OpGroupSMaxNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpFragmentMaskFetchAMD: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); + } + else + { + emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); + break; + } + + case OpFragmentFetchAMD: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); + } + else + { + emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); + break; + } + + // Vulkan 1.1 sub-group stuff ... + case OpGroupNonUniformElect: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + emit_subgroup_op(instruction); + break; + + case OpFUnordEqual: + case OpFUnordNotEqual: + case OpFUnordLessThan: + case OpFUnordGreaterThan: + case OpFUnordLessThanEqual: + case OpFUnordGreaterThanEqual: + { + // GLSL doesn't specify if floating point comparisons are ordered or unordered, + // but glslang always emits ordered floating point compares for GLSL. + // To get unordered compares, we can test the opposite thing and invert the result. + // This way, we force true when there is any NaN present. + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + string expr; + if (expression_type(op0).vecsize > 1) + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = "notEqual"; + break; + + case OpFUnordNotEqual: + comp_op = "equal"; + break; + + case OpFUnordLessThan: + comp_op = "greaterThanEqual"; + break; + + case OpFUnordLessThanEqual: + comp_op = "greaterThan"; + break; + + case OpFUnordGreaterThan: + comp_op = "lessThanEqual"; + break; + + case OpFUnordGreaterThanEqual: + comp_op = "lessThan"; + break; + + default: + assert(0); + break; + } + + expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))"); + } + else + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = " != "; + break; + + case OpFUnordNotEqual: + comp_op = " == "; + break; + + case OpFUnordLessThan: + comp_op = " >= "; + break; + + case OpFUnordLessThanEqual: + comp_op = " > "; + break; + + case OpFUnordGreaterThan: + comp_op = " <= "; + break; + + case OpFUnordGreaterThanEqual: + comp_op = " < "; + break; + + default: + assert(0); + break; + } + + expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")"); + } + + emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(ops[1], op0); + inherit_expression_dependencies(ops[1], op1); + break; + } + + case OpReportIntersectionNV: + statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpIgnoreIntersectionNV: + statement("ignoreIntersectionNV();"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTerminateRayNV: + statement("terminateRayNV();"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTraceNV: + statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", + to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpExecuteCallableNV: + statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + + case OpConvertUToPtr: + { + auto &type = get(ops[0]); + if (type.storage != StorageClassPhysicalStorageBufferEXT) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); + + auto op = type_to_glsl(type); + emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); + break; + } + + case OpConvertPtrToU: + { + auto &type = get(ops[0]); + auto &ptr_type = expression_type(ops[2]); + if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); + + auto op = type_to_glsl(type); + emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); + break; + } + + case OpUndef: + // Undefined value has been declared. + break; + + case OpLine: + { + emit_line_directive(ops[0], ops[1]); + break; + } + + case OpNoLine: + break; + + case OpDemoteToHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + statement(backend.demote_literal, ";"); + break; + + case OpIsHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + emit_op(ops[0], ops[1], "helperInvocationEXT()", false); + break; + + case OpBeginInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + + case OpEndInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + + default: + statement("// unimplemented op ", instruction.op); + break; + } +} + +// Appends function arguments, mapped from global variables, beyond the specified arg index. +// This is used when a function call uses fewer arguments than the function defines. +// This situation may occur if the function signature has been dynamically modified to +// extract global variables referenced from within the function, and convert them to +// function arguments. This is necessary for shader languages that do not support global +// access to shader input content from within a function (eg. Metal). Each additional +// function args uses the name of the global variable. Function nesting will modify the +// functions and function calls all the way up the nesting chain. +void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist) +{ + auto &args = func.arguments; + uint32_t arg_cnt = uint32_t(args.size()); + for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) + { + auto &arg = args[arg_idx]; + assert(arg.alias_global_variable); + + // If the underlying variable needs to be declared + // (ie. a local variable with deferred declaration), do so now. + uint32_t var_id = get(arg.id).basevariable; + if (var_id) + flush_variable_declaration(var_id); + + arglist.push_back(to_func_call_arg(arg, arg.id)); + } +} + +string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) +{ + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + { + return to_member_name(get(type.type_alias), index); + } + + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_m", index); +} + +string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) +{ + return join(".", to_member_name(type, index)); +} + +string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector &indices) +{ + string ret; + auto *member_type = &type; + for (auto &index : indices) + { + ret += join(".", to_member_name(*member_type, index)); + member_type = &get(member_type->member_types[index]); + } + return ret; +} + +void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) +{ + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + { + auto &name = memb[index].alias; + if (name.empty()) + return; + + ParsedIR::sanitize_identifier(name, true, true); + update_name_cache(type.member_name_cache, name); + } +} + +// Checks whether the ID is a row_major matrix that requires conversion before use +bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) +{ + // Natively supported row-major matrices do not need to be converted. + // Legacy targets do not support row major. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + auto *e = maybe_get(id); + if (e) + return e->need_transpose; + else + return has_decoration(id, DecorationRowMajor); +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!has_member_decoration(type.self, index, DecorationRowMajor)) + return false; + + // Only square row-major matrices can be converted at this time. + // Converting non-square matrices will require defining custom GLSL function that + // swaps matrix elements while retaining the original dimensional form of the matrix. + const auto mbr_type = get(type.member_types[index]); + if (mbr_type.columns != mbr_type.vecsize) + SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + + return true; +} + +// Checks if we need to remap physical type IDs when declaring the type in a buffer. +bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); +} + +// Checks whether the member is in packed data type, that might need to be unpacked. +bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked); +} + +// Wraps the expression string in a function call that converts the +// row_major matrix result of the expression to a column_major matrix. +// Base implementation uses the standard library transpose() function. +// Subclasses may override to use a different function. +string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, + bool /*is_packed*/) +{ + strip_enclosed_expression(exp_str); + if (!is_matrix(exp_type)) + { + auto column_index = exp_str.find_last_of('['); + if (column_index == string::npos) + return exp_str; + + auto column_expr = exp_str.substr(column_index); + exp_str.resize(column_index); + + auto transposed_expr = type_to_glsl_constructor(exp_type) + "("; + + // Loading a column from a row-major matrix. Unroll the load. + for (uint32_t c = 0; c < exp_type.vecsize; c++) + { + transposed_expr += join(exp_str, '[', c, ']', column_expr); + if (c + 1 < exp_type.vecsize) + transposed_expr += ", "; + } + + transposed_expr += ")"; + return transposed_expr; + } + else if (options.version < 120) + { + // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that + // these GLSL versions do not support non-square matrices. + if (exp_type.vecsize == 2 && exp_type.columns == 2) + { + if (!requires_transpose_2x2) + { + requires_transpose_2x2 = true; + force_recompile(); + } + } + else if (exp_type.vecsize == 3 && exp_type.columns == 3) + { + if (!requires_transpose_3x3) + { + requires_transpose_3x3 = true; + force_recompile(); + } + } + else if (exp_type.vecsize == 4 && exp_type.columns == 4) + { + if (!requires_transpose_4x4) + { + requires_transpose_4x4 = true; + force_recompile(); + } + } + else + SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose."); + return join("spvTranspose(", exp_str, ")"); + } + else + return join("transpose(", exp_str, ")"); +} + +string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) +{ + string type_name = type_to_glsl(type, id); + remap_variable_type_name(type, name, type_name); + return join(type_name, " ", name, type_to_array_glsl(type)); +} + +// Emit a structure member. Subclasses may override to modify output, +// or to dynamically add a padding member if needed. +void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t) +{ + auto &membertype = get(member_type_id); + + Bitset memberflags; + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + string qualifiers; + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); + + statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags), + variable_decl(membertype, to_member_name(type, index)), ";"); +} + +void CompilerGLSL::emit_struct_padding_target(const SPIRType &) +{ +} + +const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) +{ + // GL_EXT_buffer_reference variables can be marked as restrict. + if (flags.get(DecorationRestrictPointerEXT)) + return "restrict "; + + // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt && + type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage && + type.basetype != SPIRType::Sampler) + return ""; + + if (options.es) + { + auto &execution = get_entry_point(); + + if (flags.get(DecorationRelaxedPrecision)) + { + bool implied_fmediump = type.basetype == SPIRType::Float && + options.fragment.default_float_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + options.fragment.default_int_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + return implied_fmediump || implied_imediump ? "" : "mediump "; + } + else + { + bool implied_fhighp = + type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + ((options.fragment.default_int_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + return implied_fhighp || implied_ihighp ? "" : "highp "; + } + } + else if (backend.allow_precision_qualifiers) + { + // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. + // The default is highp however, so only emit mediump in the rare case that a shader has these. + if (flags.get(DecorationRelaxedPrecision)) + return "mediump "; + else + return ""; + } + else + return ""; +} + +const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +{ + auto &type = expression_type(id); + bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es; + if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) + { + // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. + auto &result_type = get(type.image.type); + if (result_type.width < 32) + return "mediump "; + } + return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); +} + +string CompilerGLSL::to_qualifiers_glsl(uint32_t id) +{ + auto &flags = ir.meta[id].decoration.decoration_flags; + string res; + + auto *var = maybe_get(id); + + if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) + res += "shared "; + + res += to_interpolation_qualifiers(flags); + if (var) + res += to_storage_qualifiers_glsl(*var); + + auto &type = expression_type(id); + if (type.image.dim != DimSubpassData && type.image.sampled == 2) + { + if (flags.get(DecorationCoherent)) + res += "coherent "; + if (flags.get(DecorationRestrict)) + res += "restrict "; + if (flags.get(DecorationNonWritable)) + res += "readonly "; + if (flags.get(DecorationNonReadable)) + res += "writeonly "; + } + + res += to_precision_qualifiers_glsl(id); + + return res; +} + +string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... + auto &type = expression_type(arg.id); + const char *direction = ""; + + if (type.pointer) + { + if (arg.write_count && arg.read_count) + direction = "inout "; + else if (arg.write_count) + direction = "out "; + } + + return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); +} + +string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) +{ + return to_expression(var.initializer); +} + +string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id) +{ +#ifndef NDEBUG + auto &type = get(type_id); + assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction || + type.storage == StorageClassGeneric); +#endif + uint32_t id = ir.increase_bound_by(1); + ir.make_constant_null(id, type_id, false); + return constant_expression(get(id)); +} + +bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const +{ + if (type.pointer) + return false; + + if (!type.array.empty() && options.flatten_multidimensional_arrays) + return false; + + for (auto &literal : type.array_size_literal) + if (!literal) + return false; + + for (auto &memb : type.member_types) + if (!type_can_zero_initialize(get(memb))) + return false; + + return true; +} + +string CompilerGLSL::variable_decl(const SPIRVariable &variable) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + auto &type = get_variable_data_type(variable); + + if (type.pointer_depth > 1) + SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); + + auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); + + if (variable.loop_variable && variable.static_expression) + { + uint32_t expr = variable.static_expression; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_expression(variable.static_expression)); + else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + } + else if (variable.initializer) + { + uint32_t expr = variable.initializer; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_initializer_expression(variable)); + else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + } + + return res; +} + +const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) +{ + auto &flags = ir.meta[variable.self].decoration.decoration_flags; + if (flags.get(DecorationRelaxedPrecision)) + return "mediump "; + else + return "highp "; +} + +string CompilerGLSL::pls_decl(const PlsRemap &var) +{ + auto &variable = get(var.id); + + SPIRType type; + type.vecsize = pls_format_to_components(var.format); + type.basetype = pls_format_to_basetype(var.format); + + return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", + to_name(variable.self)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const +{ + return to_array_size_literal(type, uint32_t(type.array.size() - 1)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const +{ + assert(type.array.size() == type.array_size_literal.size()); + + if (type.array_size_literal[index]) + { + return type.array[index]; + } + else + { + // Use the default spec constant value. + // This is the best we can do. + return evaluate_constant_u32(type.array[index]); + } +} + +string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) +{ + assert(type.array.size() == type.array_size_literal.size()); + + auto &size = type.array[index]; + if (!type.array_size_literal[index]) + return to_expression(size); + else if (size) + return convert_to_string(size); + else if (!backend.unsized_array_supported) + { + // For runtime-sized arrays, we can work around + // lack of standard support for this by simply having + // a single element array. + // + // Runtime length arrays must always be the last element + // in an interface block. + return "1"; + } + else + return ""; +} + +string CompilerGLSL::type_to_array_glsl(const SPIRType &type) +{ + if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + { + // We are using a wrapped pointer type, and we should not emit any array declarations here. + return ""; + } + + if (type.array.empty()) + return ""; + + if (options.flatten_multidimensional_arrays) + { + string res; + res += "["; + for (auto i = uint32_t(type.array.size()); i; i--) + { + res += enclose_expression(to_array_size(type, i - 1)); + if (i > 1) + res += " * "; + } + res += "]"; + return res; + } + else + { + if (type.array.size() > 1) + { + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " + "Try using --flatten-multidimensional-arrays or set " + "options.flatten_multidimensional_arrays to true."); + } + + string res; + for (auto i = uint32_t(type.array.size()); i; i--) + { + res += "["; + res += to_array_size(type, i - 1); + res += "]"; + } + return res; + } +} + +string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) +{ + auto &imagetype = get(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int: + case SPIRType::Short: + case SPIRType::SByte: + res = "i"; + break; + case SPIRType::UInt: + case SPIRType::UShort: + case SPIRType::UByte: + res = "u"; + break; + default: + break; + } + + // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. + // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. + + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); + else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + subpass_input_is_framebuffer_fetch(id)) + { + SPIRType sampled_type = get(type.image.type); + sampled_type.vecsize = 4; + return type_to_glsl(sampled_type); + } + + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) + { + // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. + if (type.image.dim == DimBuffer && type.image.sampled == 1) + res += "sampler"; + else + res += type.image.sampled == 2 ? "image" : "texture"; + } + else + res += "sampler"; + + switch (type.image.dim) + { + case Dim1D: + res += "1D"; + break; + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "Cube"; + break; + case DimRect: + if (options.es) + SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); + + if (is_legacy_desktop()) + require_extension_internal("GL_ARB_texture_rectangle"); + + res += "2DRect"; + break; + + case DimBuffer: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_texture_buffer"); + else if (!options.es && options.version < 300) + require_extension_internal("GL_EXT_texture_buffer_object"); + res += "Buffer"; + break; + + case DimSubpassData: + res += "2D"; + break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); + } + + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) + { + if (is_legacy_desktop()) + require_extension_internal("GL_EXT_texture_array"); + res += "Array"; + } + + // "Shadow" state in GLSL only exists for samplers and combined image samplers. + if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && + image_is_comparison(type, id)) + { + res += "Shadow"; + } + + return res; +} + +string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) +{ + if (backend.use_array_constructor && type.array.size() > 1) + { + if (options.flatten_multidimensional_arrays) + SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, " + "e.g. float[][]()."); + else if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); + } + + auto e = type_to_glsl(type); + if (backend.use_array_constructor) + { + for (uint32_t i = 0; i < type.array.size(); i++) + e += "[]"; + } + return e; +} + +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + { + // Need to create a magic type name which compacts the entire type information. + string name = type_to_glsl(get_pointee_type(type)); + for (size_t i = 0; i < type.array.size(); i++) + { + if (type.array_size_literal[i]) + name += join(type.array[i], "_"); + else + name += join("id", type.array[i], "_"); + } + name += "Pointer"; + return name; + } + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type, id); + + case SPIRType::Sampler: + // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing + // this distinction into the type system. + return comparison_ids.count(id) ? "samplerShadow" : "sampler"; + + case SPIRType::AccelerationStructure: + return "accelerationStructureNV"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (type.basetype == SPIRType::UInt && is_legacy()) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); + + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::SByte: + return backend.basic_int8_type; + case SPIRType::UByte: + return backend.basic_uint8_type; + case SPIRType::Short: + return backend.basic_int16_type; + case SPIRType::UShort: + return backend.basic_uint16_type; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Half: + return "float16_t"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + return "int64_t"; + case SPIRType::UInt64: + return "uint64_t"; + default: + return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bvec", type.vecsize); + case SPIRType::SByte: + return join("i8vec", type.vecsize); + case SPIRType::UByte: + return join("u8vec", type.vecsize); + case SPIRType::Short: + return join("i16vec", type.vecsize); + case SPIRType::UShort: + return join("u16vec", type.vecsize); + case SPIRType::Int: + return join("ivec", type.vecsize); + case SPIRType::UInt: + return join("uvec", type.vecsize); + case SPIRType::Half: + return join("f16vec", type.vecsize); + case SPIRType::Float: + return join("vec", type.vecsize); + case SPIRType::Double: + return join("dvec", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; + } + } + else if (type.vecsize == type.columns) // Simple Matrix builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.vecsize); + case SPIRType::Int: + return join("imat", type.vecsize); + case SPIRType::UInt: + return join("umat", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.vecsize); + case SPIRType::Float: + return join("mat", type.vecsize); + case SPIRType::Double: + return join("dmat", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("imat", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("umat", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("mat", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("dmat", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } +} + +void CompilerGLSL::add_variable(unordered_set &variables_primary, + const unordered_set &variables_secondary, string &name) +{ + if (name.empty()) + return; + + ParsedIR::sanitize_underscores(name); + if (ParsedIR::is_globally_reserved_identifier(name, true)) + { + name.clear(); + return; + } + + update_name_cache(variables_primary, variables_secondary, name); +} + +void CompilerGLSL::add_local_variable_name(uint32_t id) +{ + add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias); +} + +void CompilerGLSL::add_resource_name(uint32_t id) +{ + add_variable(resource_names, block_names, ir.meta[id].decoration.alias); +} + +void CompilerGLSL::add_header_line(const std::string &line) +{ + header_lines.push_back(line); +} + +bool CompilerGLSL::has_extension(const std::string &ext) const +{ + auto itr = find(begin(forced_extensions), end(forced_extensions), ext); + return itr != end(forced_extensions); +} + +void CompilerGLSL::require_extension(const std::string &ext) +{ + if (!has_extension(ext)) + forced_extensions.push_back(ext); +} + +void CompilerGLSL::require_extension_internal(const string &ext) +{ + if (backend.supports_extensions && !has_extension(ext)) + { + forced_extensions.push_back(ext); + force_recompile(); + } +} + +void CompilerGLSL::flatten_buffer_block(VariableID id) +{ + auto &var = get(id); + auto &type = get(var.basetype); + auto name = to_name(type.self, false); + auto &flags = ir.meta[type.self].decoration.decoration_flags; + + if (!type.array.empty()) + SPIRV_CROSS_THROW(name + " is an array of UBOs."); + if (type.basetype != SPIRType::Struct) + SPIRV_CROSS_THROW(name + " is not a struct."); + if (!flags.get(DecorationBlock)) + SPIRV_CROSS_THROW(name + " is not a block."); + if (type.member_types.empty()) + SPIRV_CROSS_THROW(name + " is an empty struct."); + + flattened_buffer_blocks.insert(id); +} + +bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const +{ + return false; // GLSL itself does not need to translate array builtin types to non-array builtin types +} + +bool CompilerGLSL::check_atomic_image(uint32_t id) +{ + auto &type = expression_type(id); + if (type.storage == StorageClassImage) + { + if (options.es && options.version < 320) + require_extension_internal("GL_OES_shader_image_atomic"); + + auto *var = maybe_get_backing_variable(id); + if (var) + { + auto &flags = ir.meta[var->self].decoration.decoration_flags; + if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable)) + { + flags.clear(DecorationNonWritable); + flags.clear(DecorationNonReadable); + force_recompile(); + } + } + return true; + } + else + return false; +} + +void CompilerGLSL::add_function_overload(const SPIRFunction &func) +{ + Hasher hasher; + for (auto &arg : func.arguments) + { + // Parameters can vary with pointer type or not, + // but that will not change the signature in GLSL/HLSL, + // so strip the pointer type before hashing. + uint32_t type_id = get_pointee_type_id(arg.type); + auto &type = get(type_id); + + if (!combined_image_samplers.empty()) + { + // If we have combined image samplers, we cannot really trust the image and sampler arguments + // we pass down to callees, because they may be shuffled around. + // Ignore these arguments, to make sure that functions need to differ in some other way + // to be considered different overloads. + if (type.basetype == SPIRType::SampledImage || + (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) + { + continue; + } + } + + hasher.u32(type_id); + } + uint64_t types_hash = hasher.get(); + + auto function_name = to_name(func.self); + auto itr = function_overloads.find(function_name); + if (itr != end(function_overloads)) + { + // There exists a function with this name already. + auto &overloads = itr->second; + if (overloads.count(types_hash) != 0) + { + // Overload conflict, assign a new name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); + } + else + { + // Can reuse the name. + overloads.insert(types_hash); + } + } + else + { + // First time we see this function name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); + } +} + +void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + // Avoid shadow declarations. + local_variable_names = resource_names; + + string decl; + + auto &type = get(func.return_type); + decl += flags_to_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += type_to_array_glsl(type); + decl += " "; + + if (func.self == ir.default_entry_point) + { + // If we need complex fallback in GLSL, we just wrap main() in a function + // and interlock the entire shader ... + if (interlocked_is_complex) + decl += "spvMainInterlockedBody"; + else + decl += "main"; + + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + SmallVector arglist; + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; + + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += merge(arglist); + decl += ")"; + statement(decl); +} + +void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) +{ + // Avoid potential cycles. + if (func.active) + return; + func.active = true; + + // If we depend on a function, emit that function before we emit our own function. + for (auto block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + if (op == OpFunctionCall) + { + // Recursively emit functions which are called. + uint32_t id = ops[2]; + emit_function(get(id), ir.meta[ops[1]].decoration.decoration_flags); + } + } + } + + if (func.entry_line.file_id != 0) + emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal); + emit_function_prototype(func, return_flags); + begin_scope(); + + if (func.self == ir.default_entry_point) + emit_entry_point_declarations(); + + current_function = &func; + auto &entry_block = get(func.entry_block); + + sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); + for (auto &array : func.constant_arrays_needed_on_stack) + { + auto &c = get(array); + auto &type = get(c.constant_type); + statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); + } + + for (auto &v : func.local_variables) + { + auto &var = get(v); + var.deferred_declaration = false; + + if (var.storage == StorageClassWorkgroup) + { + // Special variable type which cannot have initializer, + // need to be declared as standalone variables. + // Comes from MSL which can push global variables as local variables in main function. + add_local_variable_name(var.self); + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else if (var.storage == StorageClassPrivate) + { + // These variables will not have had their CFG usage analyzed, so move it to the entry block. + // Comes from MSL which can push global variables as local variables in main function. + // We could just declare them right now, but we would miss out on an important initialization case which is + // LUT declaration in MSL. + // If we don't declare the variable when it is assigned we're forced to go through a helper function + // which copies elements one by one. + add_local_variable_name(var.self); + + if (var.initializer) + { + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else + { + auto &dominated = entry_block.dominated_variables; + if (find(begin(dominated), end(dominated), var.self) == end(dominated)) + entry_block.dominated_variables.push_back(var.self); + var.deferred_declaration = true; + } + } + else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) + { + // No need to declare this variable, it has a static expression. + var.deferred_declaration = false; + } + else if (expression_is_lvalue(v)) + { + add_local_variable_name(var.self); + + // Loop variables should never be declared early, they are explicitly emitted in a loop. + if (var.initializer && !var.loop_variable) + statement(variable_decl_function_local(var), ";"); + else + { + // Don't declare variable until first use to declutter the GLSL output quite a lot. + // If we don't touch the variable before first branch, + // declare it then since we need variable declaration to be in top scope. + var.deferred_declaration = true; + } + } + else + { + // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. + // For these types (non-lvalue), we enforce forwarding through a shadowed variable. + // This means that when we OpStore to these variables, we just write in the expression ID directly. + // This breaks any kind of branching, since the variable must be statically assigned. + // Branching on samplers and images would be pretty much impossible to fake in GLSL. + var.statically_assigned = true; + } + + var.loop_variable_enable = false; + + // Loop variables are never declared outside their for-loop, so block any implicit declaration. + if (var.loop_variable) + var.deferred_declaration = false; + } + + // Enforce declaration order for regression testing purposes. + for (auto &block_id : func.blocks) + { + auto &block = get(block_id); + sort(begin(block.dominated_variables), end(block.dominated_variables)); + } + + for (auto &line : current_function->fixup_hooks_in) + line(); + + emit_block_chain(entry_block); + + end_scope(); + processing_entry_point = false; + statement(""); + + // Make sure deferred declaration state for local variables is cleared when we are done with function. + // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. + for (auto &v : func.local_variables) + { + auto &var = get(v); + var.deferred_declaration = false; + } +} + +void CompilerGLSL::emit_fixup() +{ + if (is_vertex_like_shader()) + { + if (options.vertex.fixup_clipspace) + { + const char *suffix = backend.float_literal_suffix ? "f" : ""; + statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); + } + + if (options.vertex.flip_vert_y) + statement("gl_Position.y = -gl_Position.y;"); + } +} + +void CompilerGLSL::flush_phi(BlockID from, BlockID to) +{ + auto &child = get(to); + if (child.ignore_phi_from_block == from) + return; + + unordered_set temporary_phi_variables; + + for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) + { + auto &phi = *itr; + + if (phi.parent == from) + { + auto &var = get(phi.function_variable); + + // A Phi variable might be a loop variable, so flush to static expression. + if (var.loop_variable && !var.loop_variable_enable) + var.static_expression = phi.local_variable; + else + { + flush_variable_declaration(phi.function_variable); + + // Check if we are going to write to a Phi variable that another statement will read from + // as part of another Phi node in our target block. + // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. + // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. + bool need_saved_temporary = + find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { + return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; + }) != end(child.phi_variables); + + if (need_saved_temporary) + { + // Need to make sure we declare the phi variable with a copy at the right scope. + // We cannot safely declare a temporary here since we might be inside a continue block. + if (!var.allocate_temporary_copy) + { + var.allocate_temporary_copy = true; + force_recompile(); + } + statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";"); + temporary_phi_variables.insert(phi.function_variable); + } + + // This might be called in continue block, so make sure we + // use this to emit ESSL 1.0 compliant increments/decrements. + auto lhs = to_expression(phi.function_variable); + + string rhs; + if (temporary_phi_variables.count(phi.local_variable)) + rhs = join("_", phi.local_variable, "_copy"); + else + rhs = to_pointer_expression(phi.local_variable); + + if (!optimize_read_modify_write(get(var.basetype), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + + register_write(phi.function_variable); + } + } +} + +void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) +{ + auto &to_block = get(to); + if (from == to) + return; + + assert(is_continue(to)); + if (to_block.complex_continue) + { + // Just emit the whole block chain as is. + auto usage_counts = expression_usage_counts; + + emit_block_chain(to_block); + + // Expression usage counts are moot after returning from the continue block. + expression_usage_counts = usage_counts; + } + else + { + auto &from_block = get(from); + bool outside_control_flow = false; + uint32_t loop_dominator = 0; + + // FIXME: Refactor this to not use the old loop_dominator tracking. + if (from_block.merge_block) + { + // If we are a loop header, we don't set the loop dominator, + // so just use "self" here. + loop_dominator = from; + } + else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + loop_dominator = from_block.loop_dominator; + } + + if (loop_dominator != 0) + { + auto &cfg = get_cfg_for_current_function(); + + // For non-complex continue blocks, we implicitly branch to the continue block + // by having the continue block be part of the loop header in for (; ; continue-block). + outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from); + } + + // Some simplification for for-loops. We always end up with a useless continue; + // statement since we branch to a loop block. + // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, + // we can avoid writing out an explicit continue statement. + // Similar optimization to return statements if we know we're outside flow control. + if (!outside_control_flow) + statement("continue;"); + } +} + +void CompilerGLSL::branch(BlockID from, BlockID to) +{ + flush_phi(from, to); + flush_control_dependent_expressions(from); + + bool to_is_continue = is_continue(to); + + // This is only a continue if we branch to our loop dominator. + if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get(from).loop_dominator == to) + { + // This can happen if we had a complex continue block which was emitted. + // Once the continue block tries to branch to the loop header, just emit continue; + // and end the chain here. + statement("continue;"); + } + else if (from != to && is_break(to)) + { + // We cannot break to ourselves, so check explicitly for from != to. + // This case can trigger if a loop header is all three of these things: + // - Continue block + // - Loop header + // - Break merge target all at once ... + + // Very dirty workaround. + // Switch constructs are able to break, but they cannot break out of a loop at the same time. + // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, + // write to the ladder here, and defer the break. + // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. + if (current_emitting_switch && is_loop_break(to) && + current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && + get(current_emitting_switch->loop_dominator).merge_block == to) + { + if (!current_emitting_switch->need_ladder_break) + { + force_recompile(); + current_emitting_switch->need_ladder_break = true; + } + + statement("_", current_emitting_switch->self, "_ladder_break = true;"); + } + statement("break;"); + } + else if (to_is_continue || from == to) + { + // For from == to case can happen for a do-while loop which branches into itself. + // We don't mark these cases as continue blocks, but the only possible way to branch into + // ourselves is through means of continue blocks. + + // If we are merging to a continue block, there is no need to emit the block chain for continue here. + // We can branch to the continue block after we merge execution. + + // Here we make use of structured control flow rules from spec: + // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block + // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG + // If we are branching to a merge block, we must be inside a construct which dominates the merge block. + auto &block_meta = ir.block_meta[to]; + bool branching_to_merge = + (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | + ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; + if (!to_is_continue || !branching_to_merge) + branch_to_continue(from, to); + } + else if (!is_conditional(to)) + emit_block_chain(get(to)); + + // It is important that we check for break before continue. + // A block might serve two purposes, a break block for the inner scope, and + // a continue block in the outer scope. + // Inner scope always takes precedence. +} + +void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) +{ + auto &from_block = get(from); + BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); + + // If we branch directly to our selection merge target, we don't need a code path. + bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block); + bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block); + + if (!true_block_needs_code && !false_block_needs_code) + return; + + emit_block_hints(get(from)); + + if (true_block_needs_code) + { + statement("if (", to_expression(cond), ")"); + begin_scope(); + branch(from, true_block); + end_scope(); + + if (false_block_needs_code) + { + statement("else"); + begin_scope(); + branch(from, false_block); + end_scope(); + } + } + else if (false_block_needs_code) + { + // Only need false path, use negative conditional. + statement("if (!", to_enclosed_expression(cond), ")"); + begin_scope(); + branch(from, false_block); + end_scope(); + } +} + +// FIXME: This currently cannot handle complex continue blocks +// as in do-while. +// This should be seen as a "trivial" continue block. +string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) +{ + auto *block = &get(continue_block); + + // While emitting the continue block, declare_temporary will check this + // if we have to emit temporaries. + current_continue_block = block; + + SmallVector statements; + + // Capture all statements into our list. + auto *old = redirect_statement; + redirect_statement = &statements; + + // Stamp out all blocks one after each other. + while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) + { + // Write out all instructions we have in this block. + emit_block_instructions(*block); + + // For plain branchless for/while continue blocks. + if (block->next_block) + { + flush_phi(continue_block, block->next_block); + block = &get(block->next_block); + } + // For do while blocks. The last block will be a select block. + else if (block->true_block && follow_true_block) + { + flush_phi(continue_block, block->true_block); + block = &get(block->true_block); + } + else if (block->false_block && follow_false_block) + { + flush_phi(continue_block, block->false_block); + block = &get(block->false_block); + } + else + { + SPIRV_CROSS_THROW("Invalid continue block detected!"); + } + } + + // Restore old pointer. + redirect_statement = old; + + // Somewhat ugly, strip off the last ';' since we use ',' instead. + // Ideally, we should select this behavior in statement(). + for (auto &s : statements) + { + if (!s.empty() && s.back() == ';') + s.erase(s.size() - 1, 1); + } + + current_continue_block = nullptr; + return merge(statements); +} + +void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) +{ + // While loops do not take initializers, so declare all of them outside. + for (auto &loop_var : block.loop_variables) + { + auto &var = get(loop_var); + statement(variable_decl(var), ";"); + } +} + +string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) +{ + if (block.loop_variables.empty()) + return ""; + + bool same_types = for_loop_initializers_are_same_type(block); + // We can only declare for loop initializers if all variables are of same type. + // If we cannot do this, declare individual variables before the loop header. + + // We might have a loop variable candidate which was not assigned to for some reason. + uint32_t missing_initializers = 0; + for (auto &variable : block.loop_variables) + { + uint32_t expr = get(variable).static_expression; + + // Sometimes loop variables are initialized with OpUndef, but we can just declare + // a plain variable without initializer in this case. + if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) + missing_initializers++; + } + + if (block.loop_variables.size() == 1 && missing_initializers == 0) + { + return variable_decl(get(block.loop_variables.front())); + } + else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) + { + for (auto &loop_var : block.loop_variables) + statement(variable_decl(get(loop_var)), ";"); + return ""; + } + else + { + // We have a mix of loop variables, either ones with a clear initializer, or ones without. + // Separate the two streams. + string expr; + + for (auto &loop_var : block.loop_variables) + { + uint32_t static_expr = get(loop_var).static_expression; + if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) + { + statement(variable_decl(get(loop_var)), ";"); + } + else + { + auto &var = get(loop_var); + auto &type = get_variable_data_type(var); + if (expr.empty()) + { + // For loop initializers are of the form (block.true_block), get(block.merge_block))) + condition = join("!", enclose_expression(condition)); + + statement("while (", condition, ")"); + break; + } + + default: + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + + begin_scope(); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + auto &child = get(block.next_block); + + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(child); + + uint32_t current_count = statement_count; + + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + emit_block_instructions(child); + + bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); + + if (current_count == statement_count && condition_is_temporary) + { + uint32_t target_block = child.true_block; + + switch (continue_type) + { + case SPIRBlock::ForLoop: + { + // Important that we do this in this order because + // emitting the continue block can invalidate the condition expression. + auto initializer = emit_for_loop_initializers(block); + auto condition = to_expression(child.condition); + + // Condition might have to be inverted. + if (execution_is_noop(get(child.true_block), get(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } + + auto continue_block = emit_continue_block(block.continue_block, false, false); + emit_block_hints(block); + statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); + break; + } + + case SPIRBlock::WhileLoop: + { + emit_while_loop_initializers(block); + emit_block_hints(block); + + auto condition = to_expression(child.condition); + // Condition might have to be inverted. + if (execution_is_noop(get(child.true_block), get(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } + + statement("while (", condition, ")"); + break; + } + + default: + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + + begin_scope(); + branch(child.self, target_block); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else + return false; +} + +void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) +{ + for (auto &v : block.dominated_variables) + flush_variable_declaration(v); +} + +void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &temporaries) +{ + // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. + // Need to sort these to ensure that reference output is stable. + sort(begin(temporaries), end(temporaries), + [](const pair &a, const pair &b) { return a.second < b.second; }); + + for (auto &tmp : temporaries) + { + add_local_variable_name(tmp.second); + auto &flags = ir.meta[tmp.second].decoration.decoration_flags; + auto &type = get(tmp.first); + + // Not all targets support pointer literals, so don't bother with that case. + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(tmp.first)); + + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";"); + + hoisted_temporaries.insert(tmp.second); + forced_temporaries.insert(tmp.second); + + // The temporary might be read from before it's assigned, set up the expression now. + set(tmp.second, to_name(tmp.second), tmp.first, true); + } +} + +void CompilerGLSL::emit_block_chain(SPIRBlock &block) +{ + bool select_branch_to_true_block = false; + bool select_branch_to_false_block = false; + bool skip_direct_branch = false; + bool emitted_loop_header_variables = false; + bool force_complex_continue_block = false; + ValueSaver loop_level_saver(current_loop_level); + + if (block.merge == SPIRBlock::MergeLoop) + add_loop_level(); + + emit_hoisted_temporaries(block.declare_temporary); + + SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; + if (block.continue_block) + { + continue_type = continue_block_type(get(block.continue_block)); + // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles. + if (continue_type == SPIRBlock::ComplexLoop) + block.complex_continue = true; + } + + // If we have loop variables, stop masking out access to the variable now. + for (auto var_id : block.loop_variables) + { + auto &var = get(var_id); + var.loop_variable_enable = true; + // We're not going to declare the variable directly, so emit a copy here. + emit_variable_temporary_copies(var); + } + + // Remember deferred declaration state. We will restore it before returning. + SmallVector rearm_dominated_variables(block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var_id = block.dominated_variables[i]; + auto &var = get(var_id); + rearm_dominated_variables[i] = var.deferred_declaration; + } + + // This is the method often used by spirv-opt to implement loops. + // The loop header goes straight into the continue block. + // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, + // it *MUST* be used in the continue block. This loop method will not work. + if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop)) + { + if (execution_is_noop(get(block.true_block), get(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + force_complex_continue_block = true; + } + } + // This is the older loop behavior in glslang which branches to loop body directly from the loop header. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) + { + // The body of while, is actually just the true (or false) block, so always branch there unconditionally. + if (execution_is_noop(get(block.true_block), get(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + } + } + // This is the newer loop behavior in glslang which branches from Loop header directly to + // a new block, which in turn has a OpBranchSelection without a selection merge. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) + { + skip_direct_branch = true; + emitted_loop_header_variables = true; + } + } + else if (continue_type == SPIRBlock::DoWhileLoop) + { + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + statement("do"); + begin_scope(); + + emit_block_instructions(block); + } + else if (block.merge == SPIRBlock::MergeLoop) + { + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; + + // We have a generic loop without any distinguishable pattern like for, while or do while. + get(block.continue_block).complex_continue = true; + continue_type = SPIRBlock::ComplexLoop; + + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + statement("for (;;)"); + begin_scope(); + + emit_block_instructions(block); + } + else + { + emit_block_instructions(block); + } + + // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem + // as writes to said loop variables might have been masked out, we need a recompile. + if (!emitted_loop_header_variables && !block.loop_variables.empty()) + { + force_recompile(); + for (auto var : block.loop_variables) + get(var).loop_variable = false; + block.loop_variables.clear(); + } + + flush_undeclared_variables(block); + bool emit_next_block = true; + + // Handle end of block. + switch (block.terminator) + { + case SPIRBlock::Direct: + // True when emitting complex continue block. + if (block.loop_dominator == block.next_block) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + // True if MergeToDirectForLoop succeeded. + else if (skip_direct_branch) + emit_next_block = false; + else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + break; + + case SPIRBlock::Select: + // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. + if (select_branch_to_true_block) + { + if (force_complex_continue_block) + { + assert(block.true_block == block.continue_block); + + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.true_block); + complex_continue = old_complex; + } + else + branch(block.self, block.true_block); + } + else if (select_branch_to_false_block) + { + if (force_complex_continue_block) + { + assert(block.false_block == block.continue_block); + + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.false_block); + complex_continue = old_complex; + } + else + branch(block.self, block.false_block); + } + else + branch(block.self, block.condition, block.true_block, block.false_block); + break; + + case SPIRBlock::MultiSelect: + { + auto &type = expression_type(block.condition); + bool unsigned_case = + type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte; + + if (block.merge == SPIRBlock::MergeNone) + SPIRV_CROSS_THROW("Switch statement is not structured"); + + if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) + { + // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. + SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); + } + + const char *label_suffix = ""; + if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) + label_suffix = "u"; + else if (type.basetype == SPIRType::UShort) + label_suffix = backend.uint16_t_literal_suffix; + else if (type.basetype == SPIRType::Short) + label_suffix = backend.int16_t_literal_suffix; + + SPIRBlock *old_emitting_switch = current_emitting_switch; + current_emitting_switch = █ + + if (block.need_ladder_break) + statement("bool _", block.self, "_ladder_break = false;"); + + // Find all unique case constructs. + unordered_map> case_constructs; + SmallVector block_declaration_order; + SmallVector literals_to_merge; + + // If a switch case branches to the default block for some reason, we can just remove that literal from consideration + // and let the default: block handle it. + // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. + // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. + for (auto &c : block.cases) + { + if (c.block != block.next_block && c.block != block.default_block) + { + if (!case_constructs.count(c.block)) + block_declaration_order.push_back(c.block); + case_constructs[c.block].push_back(c.value); + } + else if (c.block == block.next_block && block.default_block != block.next_block) + { + // We might have to flush phi inside specific case labels. + // If we can piggyback on default:, do so instead. + literals_to_merge.push_back(c.value); + } + } + + // Empty literal array -> default. + if (block.default_block != block.next_block) + { + auto &default_block = get(block.default_block); + + // We need to slide in the default block somewhere in this chain + // if there are fall-through scenarios since the default is declared separately in OpSwitch. + // Only consider trivial fall-through cases here. + size_t num_blocks = block_declaration_order.size(); + bool injected_block = false; + + for (size_t i = 0; i < num_blocks; i++) + { + auto &case_block = get(block_declaration_order[i]); + if (execution_is_direct_branch(case_block, default_block)) + { + // Fallthrough to default block, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block); + injected_block = true; + break; + } + else if (execution_is_direct_branch(default_block, case_block)) + { + // Default case is falling through to another case label, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block); + injected_block = true; + break; + } + } + + // Order does not matter. + if (!injected_block) + block_declaration_order.push_back(block.default_block); + else if (is_legacy_es()) + SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0."); + + case_constructs[block.default_block] = {}; + } + + size_t num_blocks = block_declaration_order.size(); + + const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string { + return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal)); + }; + + const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector &labels, + const char *suffix) -> string { + string ret; + size_t count = labels.size(); + for (size_t i = 0; i < count; i++) + { + if (i) + ret += " || "; + ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix, + count > 1 ? ")" : ""); + } + return ret; + }; + + // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, + // we need to flush phi nodes outside the switch block in a branch, + // and skip any Phi handling inside the case label to make fall-through work as expected. + // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this + // inside the case label if at all possible. + for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++) + { + if (flush_phi_required(block.self, block_declaration_order[i]) && + flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i])) + { + uint32_t target_block = block_declaration_order[i]; + + // Make sure we flush Phi, it might have been marked to be ignored earlier. + get(target_block).ignore_phi_from_block = 0; + + auto &literals = case_constructs[target_block]; + + if (literals.empty()) + { + // Oh boy, gotta make a complete negative test instead! o.o + // Find all possible literals that would *not* make us enter the default block. + // If none of those literals match, we flush Phi ... + SmallVector conditions; + for (size_t j = 0; j < num_blocks; j++) + { + auto &negative_literals = case_constructs[block_declaration_order[j]]; + for (auto &case_label : negative_literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " != ", to_case_label(case_label, unsigned_case))); + } + + statement("if (", merge(conditions, " && "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } + else + { + SmallVector conditions; + conditions.reserve(literals.size()); + for (auto &case_label : literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " == ", to_case_label(case_label, unsigned_case))); + statement("if (", merge(conditions, " || "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } + + // Mark the block so that we don't flush Phi from header to case label. + get(target_block).ignore_phi_from_block = block.self; + } + } + + // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate + // non-structured exits with the help of a switch block. + // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. + bool degenerate_switch = block.default_block != block.merge_block && block.cases.empty(); + + if (degenerate_switch || is_legacy_es()) + { + // ESSL 1.0 is not guaranteed to support do/while. + if (is_legacy_es()) + { + uint32_t counter = statement_count; + statement("for (int spvDummy", counter, " = 0; spvDummy", counter, + " < 1; spvDummy", counter, "++)"); + } + else + statement("do"); + } + else + { + emit_block_hints(block); + statement("switch (", to_expression(block.condition), ")"); + } + begin_scope(); + + for (size_t i = 0; i < num_blocks; i++) + { + uint32_t target_block = block_declaration_order[i]; + auto &literals = case_constructs[target_block]; + + if (literals.empty()) + { + // Default case. + if (!degenerate_switch) + { + if (is_legacy_es()) + statement("else"); + else + statement("default:"); + } + } + else + { + if (is_legacy_es()) + { + statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix), + ")"); + } + else + { + for (auto &case_literal : literals) + { + // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. + statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); + } + } + } + + auto &case_block = get(target_block); + if (backend.support_case_fallthrough && i + 1 < num_blocks && + execution_is_direct_branch(case_block, get(block_declaration_order[i + 1]))) + { + // We will fall through here, so just terminate the block chain early. + // We still need to deal with Phi potentially. + // No need for a stack-like thing here since we only do fall-through when there is a + // single trivial branch to fall-through target.. + current_emitting_switch_fallthrough = true; + } + else + current_emitting_switch_fallthrough = false; + + if (!degenerate_switch) + begin_scope(); + branch(block.self, target_block); + if (!degenerate_switch) + end_scope(); + + current_emitting_switch_fallthrough = false; + } + + // Might still have to flush phi variables if we branch from loop header directly to merge target. + if (flush_phi_required(block.self, block.next_block)) + { + if (block.default_block == block.next_block || !literals_to_merge.empty()) + { + for (auto &case_literal : literals_to_merge) + statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); + + if (block.default_block == block.next_block) + { + if (is_legacy_es()) + statement("else"); + else + statement("default:"); + } + + begin_scope(); + flush_phi(block.self, block.next_block); + statement("break;"); + end_scope(); + } + } + + if (degenerate_switch && !is_legacy_es()) + end_scope_decl("while(false)"); + else + end_scope(); + + if (block.need_ladder_break) + { + statement("if (_", block.self, "_ladder_break)"); + begin_scope(); + statement("break;"); + end_scope(); + } + + current_emitting_switch = old_emitting_switch; + break; + } + + case SPIRBlock::Return: + { + for (auto &line : current_function->fixup_hooks_out) + line(); + + if (processing_entry_point) + emit_fixup(); + + auto &cfg = get_cfg_for_current_function(); + + if (block.return_value) + { + auto &type = expression_type(block.return_value); + if (!type.array.empty() && !backend.can_return_array) + { + // If we cannot return arrays, we will have a special out argument we can write to instead. + // The backend is responsible for setting this up, and redirection the return values as appropriate. + if (ir.ids[block.return_value].get_type() != TypeUndef) + { + emit_array_copy("spvReturnValue", block.return_value, StorageClassFunction, + get_expression_effective_storage_class(block.return_value)); + } + + if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + statement("return;"); + } + } + else + { + // OpReturnValue can return Undef, so don't emit anything for this case. + if (ir.ids[block.return_value].get_type() != TypeUndef) + statement("return ", to_expression(block.return_value), ";"); + } + } + else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... + statement("return;"); + } + break; + } + + case SPIRBlock::Kill: + statement(backend.discard_literal, ";"); + break; + + case SPIRBlock::Unreachable: + emit_next_block = false; + break; + + default: + SPIRV_CROSS_THROW("Unimplemented block terminator."); + } + + if (block.next_block && emit_next_block) + { + // If we hit this case, we're dealing with an unconditional branch, which means we will output + // that block after this. If we had selection merge, we already flushed phi variables. + if (block.merge != SPIRBlock::MergeSelection) + { + flush_phi(block.self, block.next_block); + // For a direct branch, need to remember to invalidate expressions in the next linear block instead. + get(block.next_block).invalidate_expressions = block.invalidate_expressions; + } + + // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. + if (!current_emitting_switch_fallthrough) + { + // For merge selects we might have ignored the fact that a merge target + // could have been a break; or continue; + // We will need to deal with it here. + if (is_loop_break(block.next_block)) + { + // Cannot check for just break, because switch statements will also use break. + assert(block.merge == SPIRBlock::MergeSelection); + statement("break;"); + } + else if (is_continue(block.next_block)) + { + assert(block.merge == SPIRBlock::MergeSelection); + branch_to_continue(block.self, block.next_block); + } + else if (BlockID(block.self) != block.next_block) + emit_block_chain(get(block.next_block)); + } + } + + if (block.merge == SPIRBlock::MergeLoop) + { + if (continue_type == SPIRBlock::DoWhileLoop) + { + // Make sure that we run the continue block to get the expressions set, but this + // should become an empty string. + // We have no fallbacks if we cannot forward everything to temporaries ... + const auto &continue_block = get(block.continue_block); + bool positive_test = execution_is_noop(get(continue_block.true_block), + get(continue_block.loop_dominator)); + + uint32_t current_count = statement_count; + auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test); + if (statement_count != current_count) + { + // The DoWhile block has side effects, force ComplexLoop pattern next pass. + get(block.continue_block).complex_continue = true; + force_recompile(); + } + + // Might have to invert the do-while test here. + auto condition = to_expression(continue_block.condition); + if (!positive_test) + condition = join("!", enclose_expression(condition)); + + end_scope_decl(join("while (", condition, ")")); + } + else + end_scope(); + + loop_level_saver.release(); + + // We cannot break out of two loops at once, so don't check for break; here. + // Using block.self as the "from" block isn't quite right, but it has the same scope + // and dominance structure, so it's fine. + if (is_continue(block.merge_block)) + branch_to_continue(block.self, block.merge_block); + else + emit_block_chain(get(block.merge_block)); + } + + // Forget about control dependent expressions now. + block.invalidate_expressions.clear(); + + // After we return, we must be out of scope, so if we somehow have to re-emit this function, + // re-declare variables if necessary. + assert(rearm_dominated_variables.size() == block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var = block.dominated_variables[i]; + get(var).deferred_declaration = rearm_dominated_variables[i]; + } + + // Just like for deferred declaration, we need to forget about loop variable enable + // if our block chain is reinstantiated later. + for (auto &var_id : block.loop_variables) + get(var_id).loop_variable_enable = false; +} + +void CompilerGLSL::begin_scope() +{ + statement("{"); + indent++; +} + +void CompilerGLSL::end_scope() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}"); +} + +void CompilerGLSL::end_scope(const string &trailer) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}", trailer); +} + +void CompilerGLSL::end_scope_decl() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("};"); +} + +void CompilerGLSL::end_scope_decl(const string &decl) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("} ", decl, ";"); +} + +void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) +{ + // If our variable is remapped, and we rely on type-remapping information as + // well, then we cannot pass the variable as a function parameter. + // Fixing this is non-trivial without stamping out variants of the same function, + // so for now warn about this and suggest workarounds instead. + for (uint32_t i = 0; i < length; i++) + { + auto *var = maybe_get(args[i]); + if (!var || !var->remapped_variable) + continue; + + auto &type = get(var->basetype); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + { + SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " + "This will not work correctly because type-remapping information is lost. " + "To workaround, please consider not passing the subpass input as a function parameter, " + "or use in/out variables instead which do not need type remapping information."); + } + } +} + +const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) +{ + // FIXME: This is kind of hacky. There should be a cleaner way. + auto offset = uint32_t(&instr - current_emitting_block->ops.data()); + if ((offset + 1) < current_emitting_block->ops.size()) + return ¤t_emitting_block->ops[offset + 1]; + else + return nullptr; +} + +uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) +{ + return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); +} + +void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass) +{ + statement(lhs, " = ", to_expression(rhs_id), ";"); +} + +void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) +{ + if (!backend.force_gl_in_out_block) + return; + // This path is only relevant for GL backends. + + auto *var = maybe_get(source_id); + if (!var) + return; + + if (var->storage != StorageClassInput) + return; + + auto &type = get_variable_data_type(*var); + if (type.array.empty()) + return; + + auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition); + bool is_tess = is_tessellation_shader(); + bool is_patch = has_decoration(var->self, DecorationPatch); + + // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. + // We must unroll the array load. + // For builtins, we couldn't catch this case normally, + // because this is resolved in the OpAccessChain in most cases. + // If we load the entire array, we have no choice but to unroll here. + if (!is_patch && (is_builtin || is_tess)) + { + auto new_expr = join("_", target_id, "_unrolled"); + statement(variable_decl(type, new_expr, target_id), ";"); + string array_expr; + if (type.array_size_literal.back()) + { + array_expr = convert_to_string(type.array.back()); + if (type.array.back() == 0) + SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); + } + else + array_expr = to_expression(type.array.back()); + + // The array size might be a specialization constant, so use a for-loop instead. + statement("for (int i = 0; i < int(", array_expr, "); i++)"); + begin_scope(); + if (is_builtin) + statement(new_expr, "[i] = gl_in[i].", expr, ";"); + else + statement(new_expr, "[i] = ", expr, "[i];"); + end_scope(); + + expr = move(new_expr); + } +} + +void CompilerGLSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) +{ + auto *var = maybe_get_backing_variable(source_id); + if (var) + source_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(source_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast(get_decoration(source_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + + // TODO: Fill in for more builtins. + switch (builtin) + { + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInSampleId: + case BuiltInBaseVertex: + case BuiltInBaseInstance: + case BuiltInDrawIndex: + case BuiltInFragStencilRefEXT: + case BuiltInInstanceCustomIndexNV: + expected_type = SPIRType::Int; + break; + + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + case BuiltInLocalInvocationIndex: + case BuiltInWorkgroupSize: + case BuiltInNumWorkgroups: + case BuiltInIncomingRayFlagsNV: + case BuiltInLaunchIdNV: + case BuiltInLaunchSizeNV: + expected_type = SPIRType::UInt; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + expr = bitcast_expression(expr_type, expected_type, expr); +} + +void CompilerGLSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) +{ + // Only interested in standalone builtin variables. + if (!has_decoration(target_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast(get_decoration(target_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + + // TODO: Fill in for more builtins. + switch (builtin) + { + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: + expected_type = SPIRType::Int; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + { + auto type = expr_type; + type.basetype = expected_type; + expr = bitcast_expression(type, expr_type.basetype, expr); + } +} + +void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr) +{ + if (*backend.nonuniform_qualifier == '\0') + return; + + // Handle SPV_EXT_descriptor_indexing. + if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::Image) + { + // The image/sampler ID must be declared as non-uniform. + // However, it is not legal GLSL to have + // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier + // to the array indexing, like + // samplers[nonuniformEXT(index)]. + // While the access chain will generally be nonuniformEXT, it's not necessarily so, + // so we might have to fixup the OpLoad-ed expression late. + + auto start_array_index = expr.find_first_of('['); + + if (start_array_index == string::npos) + return; + + // Check for the edge case that a non-arrayed resource was marked to be nonuniform, + // and the bracket we found is actually part of non-resource related data. + if (expr.find_first_of(',') < start_array_index) + return; + + // We've opened a bracket, track expressions until we can close the bracket. + // This must be our image index. + size_t end_array_index = string::npos; + unsigned bracket_count = 1; + for (size_t index = start_array_index + 1; index < expr.size(); index++) + { + if (expr[index] == ']') + { + if (--bracket_count == 0) + { + end_array_index = index; + break; + } + } + else if (expr[index] == '[') + bracket_count++; + } + + assert(bracket_count == 0); + + // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's + // nothing we can do here to express that. + if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) + return; + + start_array_index++; + + expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", + expr.substr(start_array_index, end_array_index - start_array_index), ")", + expr.substr(end_array_index, string::npos)); + } +} + +void CompilerGLSL::emit_block_hints(const SPIRBlock &) +{ +} + +void CompilerGLSL::preserve_alias_on_reset(uint32_t id) +{ + preserved_aliases[id] = get_name(id); +} + +void CompilerGLSL::reset_name_caches() +{ + for (auto &preserved : preserved_aliases) + set_name(preserved.first, preserved.second); + + preserved_aliases.clear(); + resource_names.clear(); + block_input_names.clear(); + block_output_names.clear(); + block_ubo_names.clear(); + block_ssbo_names.clear(); + block_names.clear(); + function_overloads.clear(); +} + +void CompilerGLSL::fixup_type_alias() +{ + // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. + ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { + if (!type.type_alias) + return; + + if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + { + // Top-level block types should never alias anything else. + type.type_alias = 0; + } + else if (type_is_block_like(type) && type.self == ID(self)) + { + // A block-like type is any type which contains Offset decoration, but not top-level blocks, + // i.e. blocks which are placed inside buffers. + // Become the master. + ir.for_each_typed_id([&](uint32_t other_id, SPIRType &other_type) { + if (other_id == self) + return; + + if (other_type.type_alias == type.type_alias) + other_type.type_alias = self; + }); + + this->get(type.type_alias).type_alias = self; + type.type_alias = 0; + } + }); +} + +void CompilerGLSL::reorder_type_alias() +{ + // Reorder declaration of types so that the master of the type alias is always emitted first. + // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which + // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. + auto loop_lock = ir.create_loop_hard_lock(); + + auto &type_ids = ir.ids_for_type[TypeType]; + for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) + { + auto &type = get(*alias_itr); + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + { + // We will skip declaring this type, so make sure the type_alias type comes before. + auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias)); + assert(master_itr != end(type_ids)); + + if (alias_itr < master_itr) + { + // Must also swap the type order for the constant-type joined array. + auto &joined_types = ir.ids_for_constant_or_type; + auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); + auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); + assert(alt_alias_itr != end(joined_types)); + assert(alt_master_itr != end(joined_types)); + + swap(*alias_itr, *master_itr); + swap(*alt_alias_itr, *alt_master_itr); + } + } + } +} + +void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) +{ + // If we are redirecting statements, ignore the line directive. + // Common case here is continue blocks. + if (redirect_statement) + return; + + if (options.emit_line_directives) + { + require_extension_internal("GL_GOOGLE_cpp_style_line_directive"); + statement_no_indent("#line ", line_literal, " \"", get(file_id).str, "\""); + } +} + +void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id) +{ + // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen, + // we need to know NonUniformEXT a little earlier, when the resource is actually loaded. + // Back-propagate the qualifier based on the expression dependency chain. + + if (!has_decoration(id, DecorationNonUniformEXT)) + { + set_decoration(id, DecorationNonUniformEXT); + force_recompile(); + } + + auto *e = maybe_get(id); + auto *combined = maybe_get(id); + auto *chain = maybe_get(id); + if (e) + { + for (auto &expr : e->expression_dependencies) + propagate_nonuniform_qualifier(expr); + for (auto &expr : e->implied_read_expressions) + propagate_nonuniform_qualifier(expr); + } + else if (combined) + { + propagate_nonuniform_qualifier(combined->image); + propagate_nonuniform_qualifier(combined->sampler); + } + else if (chain) + { + for (auto &expr : chain->implied_read_expressions) + propagate_nonuniform_qualifier(expr); + } +} + +void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, + SmallVector chain) +{ + // Fully unroll all member/array indices one by one. + + auto &lhs_type = get(lhs_type_id); + auto &rhs_type = get(rhs_type_id); + + if (!lhs_type.array.empty()) + { + // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types, + // and this is a rather obscure opcode anyways, keep it simple unless we are forced to. + uint32_t array_size = to_array_size_literal(lhs_type); + chain.push_back(0); + + for (uint32_t i = 0; i < array_size; i++) + { + chain.back() = i; + emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain); + } + } + else if (lhs_type.basetype == SPIRType::Struct) + { + chain.push_back(0); + uint32_t member_count = uint32_t(lhs_type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + chain.back() = i; + emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain); + } + } + else + { + // Need to handle unpack/packing fixups since this can differ wildly between the logical types, + // particularly in MSL. + // To deal with this, we emit access chains and go through emit_store_statement + // to deal with all the special cases we can encounter. + + AccessChainMeta lhs_meta, rhs_meta; + auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta); + auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta); + + uint32_t id = ir.increase_bound_by(2); + lhs_id = id; + rhs_id = id + 1; + + { + auto &lhs_expr = set(lhs_id, move(lhs), lhs_type_id, true); + lhs_expr.need_transpose = lhs_meta.need_transpose; + + if (lhs_meta.storage_is_packed) + set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked); + if (lhs_meta.storage_physical_type != 0) + set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type); + + forwarded_temporaries.insert(lhs_id); + suppressed_usage_tracking.insert(lhs_id); + } + + { + auto &rhs_expr = set(rhs_id, move(rhs), rhs_type_id, true); + rhs_expr.need_transpose = rhs_meta.need_transpose; + + if (rhs_meta.storage_is_packed) + set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked); + if (rhs_meta.storage_physical_type != 0) + set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type); + + forwarded_temporaries.insert(rhs_id); + suppressed_usage_tracking.insert(rhs_id); + } + + emit_store_statement(lhs_id, rhs_id); + } +} + +bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const +{ + if (!has_decoration(id, DecorationInputAttachmentIndex)) + return false; + + uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex); + for (auto &remap : subpass_to_framebuffer_fetch_attachment) + if (remap.first == input_attachment_index) + return true; + + return false; +} + +const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const +{ + const SPIRVariable *ret = nullptr; + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (has_decoration(var.self, DecorationInputAttachmentIndex) && + get_decoration(var.self, DecorationInputAttachmentIndex) == index) + { + ret = &var; + } + }); + return ret; +} + +const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const +{ + const SPIRVariable *ret = nullptr; + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location) + ret = &var; + }); + return ret; +} + +void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() +{ + for (auto &remap : subpass_to_framebuffer_fetch_attachment) + { + auto *subpass_var = find_subpass_input_by_attachment_index(remap.first); + auto *output_var = find_color_output_by_location(remap.second); + if (!subpass_var) + continue; + if (!output_var) + SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able " + "to read from it."); + if (is_array(get(output_var->basetype))) + SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs."); + + auto &func = get(get_entry_point().self); + func.fixup_hooks_in.push_back([=]() { + if (is_legacy()) + { + statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[", + get_decoration(output_var->self, DecorationLocation), "];"); + } + else + { + uint32_t num_rt_components = this->get(output_var->basetype).vecsize; + statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ", + to_expression(output_var->self), ";"); + } + }); + } +} + +bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const +{ + return image_is_comparison(get(get(id).basetype), id); +} + +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) +{ + static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot", + "GL_KHR_shader_subgroup_basic", + "GL_KHR_shader_subgroup_vote", + "GL_NV_gpu_shader_5", + "GL_NV_shader_thread_group", + "GL_NV_shader_thread_shuffle", + "GL_ARB_shader_ballot", + "GL_ARB_shader_group_vote", + "GL_AMD_gcn_shader" }; + return retval[c]; +} + +SmallVector CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c) +{ + switch (c) + { + case ARB_shader_ballot: + return { "GL_ARB_shader_int64" }; + case AMD_gcn_shader: + return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" }; + default: + return {}; + } +} + +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c) +{ + switch (c) + { + case ARB_shader_ballot: + return "defined(GL_ARB_shader_int64)"; + case AMD_gcn_shader: + return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))"; + default: + return ""; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependencies(Feature feature) +{ + switch (feature) + { + case SubgroupAllEqualT: + return { SubgroupBrodcast_First, SubgroupAll_Any_AllEqualBool }; + case SubgroupElect: + return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return { SubgroupMask }; + case SubgroupBallotBitCount: + return { SubgroupBallot }; + default: + return {}; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependency_mask(Feature feature) +{ + return build_mask(get_feature_dependencies(feature)); +} + +bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) +{ + static const bool retval[FeatureCount] = { false, false, false, false, false, false, + true, // SubgroupBalloFindLSB_MSB + false, false, false, false, + true, // SubgroupMemBarrier - replaced with workgroup memory barriers + false, false, true, false }; + + return retval[feature]; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper:: + get_KHR_extension_for_feature(Feature feature) +{ + static const Candidate extensions[FeatureCount] = { + KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, + KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, + KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot + }; + + return extensions[feature]; +} + +void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature) +{ + feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature); +} + +bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const +{ + return (feature_mask & (1u << feature)) != 0; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const +{ + Result res; + + for (uint32_t i = 0u; i < FeatureCount; ++i) + { + if (feature_mask & (1u << i)) + { + auto feature = static_cast(i); + std::unordered_set unique_candidates; + + auto candidates = get_candidates_for_feature(feature); + unique_candidates.insert(candidates.begin(), candidates.end()); + + auto deps = get_feature_dependencies(feature); + for (Feature d : deps) + { + candidates = get_candidates_for_feature(d); + if (!candidates.empty()) + unique_candidates.insert(candidates.begin(), candidates.end()); + } + + for (uint32_t c : unique_candidates) + ++res.weights[static_cast(c)]; + } + } + + return res; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature ft, const Result &r) +{ + auto c = get_candidates_for_feature(ft); + auto cmp = [&r](Candidate a, Candidate b) { + if (r.weights[a] == r.weights[b]) + return a < b; // Prefer candidates with lower enum value + return r.weights[a] > r.weights[b]; + }; + std::sort(c.begin(), c.end(), cmp); + return c; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature feature) +{ + switch (feature) + { + case SubgroupMask: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupSize: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot }; + case SubgroupInvocationID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case NumSubgroups: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case SubgroupBrodcast_First: + return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; + case SubgroupBallotFindLSB_MSB: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; + case SubgroupAll_Any_AllEqualBool: + return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader }; + case SubgroupAllEqualT: + return {}; // depends on other features only + case SubgroupElect: + return {}; // depends on other features only + case SubgroupBallot: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupBarrier: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader }; + case SubgroupMemBarrier: + return { KHR_shader_subgroup_basic }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return {}; + case SubgroupBallotBitExtract: + return { NV_shader_thread_group }; + case SubgroupBallotBitCount: + return {}; + default: + return {}; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( + const SmallVector &features) +{ + FeatureMask mask = 0; + for (Feature f : features) + mask |= FeatureMask(1) << f; + return mask; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() +{ + for (auto &weight : weights) + weight = 0; + + // Make sure KHR_shader_subgroup extensions are always prefered. + const uint32_t big_num = FeatureCount; + weights[KHR_shader_subgroup_ballot] = big_num; + weights[KHR_shader_subgroup_basic] = big_num; + weights[KHR_shader_subgroup_vote] = big_num; +} + +void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) +{ + // Must be ordered to maintain deterministic output, so vector is appropriate. + if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) == + end(workaround_ubo_load_overload_types)) + { + force_recompile(); + workaround_ubo_load_overload_types.push_back(id); + } +} + +void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) +{ + // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. + // To load these types correctly, we must first wrap them in a dummy function which only purpose is to + // ensure row_major decoration is actually respected. + auto *var = maybe_get_backing_variable(ptr); + if (!var) + return; + + auto &backing_type = get(var->basetype); + bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform && + has_decoration(backing_type.self, DecorationBlock); + if (!is_ubo) + return; + + auto *type = &get(loaded_type); + bool rewrite = false; + + if (is_matrix(*type)) + { + // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, + // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. + // If there is any row-major action going on, we apply the workaround. + // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. + // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. + type = &backing_type; + } + + if (type->basetype == SPIRType::Struct) + { + // If we're loading a struct where any member is a row-major matrix, apply the workaround. + for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) + { + if (combined_decoration_for_member(*type, i).get(DecorationRowMajor)) + { + rewrite = true; + break; + } + } + } + + if (rewrite) + { + request_workaround_wrapper_overload(loaded_type); + expr = join("spvWorkaroundRowMajor(", expr, ")"); + } +} diff --git a/dep/spirv-cross/spirv_glsl.hpp b/dep/spirv-cross/spirv_glsl.hpp new file mode 100644 index 000000000..3c41efef7 --- /dev/null +++ b/dep/spirv-cross/spirv_glsl.hpp @@ -0,0 +1,903 @@ +/* + * Copyright 2015-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_GLSL_HPP +#define SPIRV_CROSS_GLSL_HPP + +#include "GLSL.std.450.h" +#include "spirv_cross.hpp" +#include +#include +#include + +namespace SPIRV_CROSS_NAMESPACE +{ +enum PlsFormat +{ + PlsNone = 0, + + PlsR11FG11FB10F, + PlsR32F, + PlsRG16F, + PlsRGB10A2, + PlsRGBA8, + PlsRG16, + + PlsRGBA8I, + PlsRG16I, + + PlsRGB10A2UI, + PlsRGBA8UI, + PlsRG16UI, + PlsR32UI +}; + +struct PlsRemap +{ + uint32_t id; + PlsFormat format; +}; + +enum AccessChainFlagBits +{ + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT = 1 << 0, + ACCESS_CHAIN_CHAIN_ONLY_BIT = 1 << 1, + ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2, + ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3, + ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4, + ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT = 1 << 5 +}; +typedef uint32_t AccessChainFlags; + +class CompilerGLSL : public Compiler +{ +public: + struct Options + { + // The shading language version. Corresponds to #version $VALUE. + uint32_t version = 450; + + // Emit the OpenGL ES shading language instead of desktop OpenGL. + bool es = false; + + // Debug option to always emit temporary variables for all expressions. + bool force_temporary = false; + + // If true, Vulkan GLSL features are used instead of GL-compatible features. + // Mostly useful for debugging SPIR-V files. + bool vulkan_semantics = false; + + // If true, gl_PerVertex is explicitly redeclared in vertex, geometry and tessellation shaders. + // The members of gl_PerVertex is determined by which built-ins are declared by the shader. + // This option is ignored in ES versions, as redeclaration in ES is not required, and it depends on a different extension + // (EXT_shader_io_blocks) which makes things a bit more fuzzy. + bool separate_shader_objects = false; + + // Flattens multidimensional arrays, e.g. float foo[a][b][c] into single-dimensional arrays, + // e.g. float foo[a * b * c]. + // This function does not change the actual SPIRType of any object. + // Only the generated code, including declarations of interface variables are changed to be single array dimension. + bool flatten_multidimensional_arrays = false; + + // For older desktop GLSL targets than version 420, the + // GL_ARB_shading_language_420pack extensions is used to be able to support + // layout(binding) on UBOs and samplers. + // If disabled on older targets, binding decorations will be stripped. + bool enable_420pack_extension = true; + + // In non-Vulkan GLSL, emit push constant blocks as UBOs rather than plain uniforms. + bool emit_push_constant_as_uniform_buffer = false; + + // Always emit uniform blocks as plain uniforms, regardless of the GLSL version, even when UBOs are supported. + // Does not apply to shader storage or push constant blocks. + bool emit_uniform_buffer_as_plain_uniforms = false; + + // Emit OpLine directives if present in the module. + // May not correspond exactly to original source, but should be a good approximation. + bool emit_line_directives = false; + + // In cases where readonly/writeonly decoration are not used at all, + // we try to deduce which qualifier(s) we should actually used, since actually emitting + // read-write decoration is very rare, and older glslang/HLSL compilers tend to just emit readwrite as a matter of fact. + // The default (true) is to enable automatic deduction for these cases, but if you trust the decorations set + // by the SPIR-V, it's recommended to set this to false. + bool enable_storage_image_qualifier_deduction = true; + + // On some targets (WebGPU), uninitialized variables are banned. + // If this is enabled, all variables (temporaries, Private, Function) + // which would otherwise be uninitialized will now be initialized to 0 instead. + bool force_zero_initialized_variables = false; + + // In GLSL, force use of I/O block flattening, similar to + // what happens on legacy GLSL targets for blocks and structs. + bool force_flattened_io_blocks = false; + + enum Precision + { + DontCare, + Lowp, + Mediump, + Highp + }; + + struct VertexOptions + { + // "Vertex-like shader" here is any shader stage that can write BuiltInPosition. + + // GLSL: In vertex-like shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). + // MSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // HLSL: In vertex-like shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + bool fixup_clipspace = false; + + // In vertex-like shaders, inverts gl_Position.y or equivalent. + bool flip_vert_y = false; + + // GLSL only, for HLSL version of this option, see CompilerHLSL. + // If true, the backend will assume that InstanceIndex will need to apply + // a base instance offset. Set to false if you know you will never use base instance + // functionality as it might remove some internal uniforms. + bool support_nonzero_base_instance = true; + } vertex; + + struct FragmentOptions + { + // Add precision mediump float in ES targets when emitting GLES source. + // Add precision highp int in ES targets when emitting GLES source. + Precision default_float_precision = Mediump; + Precision default_int_precision = Highp; + } fragment; + }; + + void remap_pixel_local_storage(std::vector inputs, std::vector outputs) + { + pls_inputs = std::move(inputs); + pls_outputs = std::move(outputs); + remap_pls_variables(); + } + + // Redirect a subpassInput reading from input_attachment_index to instead load its value from + // the color attachment at location = color_location. Requires ESSL. + void remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location); + + explicit CompilerGLSL(std::vector spirv_) + : Compiler(std::move(spirv_)) + { + init(); + } + + CompilerGLSL(const uint32_t *ir_, size_t word_count) + : Compiler(ir_, word_count) + { + init(); + } + + explicit CompilerGLSL(const ParsedIR &ir_) + : Compiler(ir_) + { + init(); + } + + explicit CompilerGLSL(ParsedIR &&ir_) + : Compiler(std::move(ir_)) + { + init(); + } + + const Options &get_common_options() const + { + return options; + } + + void set_common_options(const Options &opts) + { + options = opts; + } + + std::string compile() override; + + // Returns the current string held in the conversion buffer. Useful for + // capturing what has been converted so far when compile() throws an error. + std::string get_partial_source(); + + // Adds a line to be added right after #version in GLSL backend. + // This is useful for enabling custom extensions which are outside the scope of SPIRV-Cross. + // This can be combined with variable remapping. + // A new-line will be added. + // + // While add_header_line() is a more generic way of adding arbitrary text to the header + // of a GLSL file, require_extension() should be used when adding extensions since it will + // avoid creating collisions with SPIRV-Cross generated extensions. + // + // Code added via add_header_line() is typically backend-specific. + void add_header_line(const std::string &str); + + // Adds an extension which is required to run this shader, e.g. + // require_extension("GL_KHR_my_extension"); + void require_extension(const std::string &ext); + + // Legacy GLSL compatibility method. + // Takes a uniform or push constant variable and flattens it into a (i|u)vec4 array[N]; array instead. + // For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but + // mixing int and float is not. + // The name of the uniform array will be the same as the interface block name. + void flatten_buffer_block(VariableID id); + + // After compilation, query if a variable ID was used as a depth resource. + // This is meaningful for MSL since descriptor types depend on this knowledge. + // Cases which return true: + // - Images which are declared with depth = 1 image type. + // - Samplers which are statically used at least once with Dref opcodes. + // - Images which are statically used at least once with Dref opcodes. + bool variable_is_depth_or_compare(VariableID id) const; + +protected: + struct ShaderSubgroupSupportHelper + { + // lower enum value = greater priority + enum Candidate + { + KHR_shader_subgroup_ballot, + KHR_shader_subgroup_basic, + KHR_shader_subgroup_vote, + NV_gpu_shader_5, + NV_shader_thread_group, + NV_shader_thread_shuffle, + ARB_shader_ballot, + ARB_shader_group_vote, + AMD_gcn_shader, + + CandidateCount + }; + + static const char *get_extension_name(Candidate c); + static SmallVector get_extra_required_extension_names(Candidate c); + static const char *get_extra_required_extension_predicate(Candidate c); + + enum Feature + { + SubgroupMask, + SubgroupSize, + SubgroupInvocationID, + SubgroupID, + NumSubgroups, + SubgroupBrodcast_First, + SubgroupBallotFindLSB_MSB, + SubgroupAll_Any_AllEqualBool, + SubgroupAllEqualT, + SubgroupElect, + SubgroupBarrier, + SubgroupMemBarrier, + SubgroupBallot, + SubgroupInverseBallot_InclBitCount_ExclBitCout, + SubgroupBallotBitExtract, + SubgroupBallotBitCount, + + FeatureCount + }; + + using FeatureMask = uint32_t; + static_assert(sizeof(FeatureMask) * 8u >= FeatureCount, "Mask type needs more bits."); + + using CandidateVector = SmallVector; + using FeatureVector = SmallVector; + + static FeatureVector get_feature_dependencies(Feature feature); + static FeatureMask get_feature_dependency_mask(Feature feature); + static bool can_feature_be_implemented_without_extensions(Feature feature); + static Candidate get_KHR_extension_for_feature(Feature feature); + + struct Result + { + Result(); + uint32_t weights[CandidateCount]; + }; + + void request_feature(Feature feature); + bool is_feature_requested(Feature feature) const; + Result resolve() const; + + static CandidateVector get_candidates_for_feature(Feature ft, const Result &r); + + private: + static CandidateVector get_candidates_for_feature(Feature ft); + static FeatureMask build_mask(const SmallVector &features); + FeatureMask feature_mask = 0; + }; + + // TODO remove this function when all subgroup ops are supported (or make it always return true) + static bool is_supported_subgroup_op_in_opengl(spv::Op op); + + void reset(); + void emit_function(SPIRFunction &func, const Bitset &return_flags); + + bool has_extension(const std::string &ext) const; + void require_extension_internal(const std::string &ext); + + // Virtualize methods which need to be overridden by subclass targets like C++ and such. + virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags); + + SPIRBlock *current_emitting_block = nullptr; + SPIRBlock *current_emitting_switch = nullptr; + bool current_emitting_switch_fallthrough = false; + + virtual void emit_instruction(const Instruction &instr); + void emit_block_instructions(SPIRBlock &block); + virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count); + virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count); + virtual void emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count); + virtual void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count); + virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count); + virtual void emit_header(); + void emit_line_directive(uint32_t file_id, uint32_t line_literal); + void build_workgroup_size(SmallVector &arguments, const SpecializationConstant &x, + const SpecializationConstant &y, const SpecializationConstant &z); + + void request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature); + + virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); + virtual void emit_texture_op(const Instruction &i, bool sparse); + virtual std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions); + virtual void emit_subgroup_op(const Instruction &i); + virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0); + virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage); + virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier = "", uint32_t base_offset = 0); + virtual void emit_struct_padding_target(const SPIRType &type); + virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0); + std::string constant_expression(const SPIRConstant &c); + std::string constant_op_expression(const SPIRConstantOp &cop); + virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); + virtual void emit_fixup(); + virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); + virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id); + + struct TextureFunctionBaseArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionBaseArguments() = default; + VariableID img = 0; + const SPIRType *imgtype = nullptr; + bool is_fetch = false, is_gather = false, is_proj = false; + }; + + struct TextureFunctionNameArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionNameArguments() = default; + TextureFunctionBaseArguments base; + bool has_array_offsets = false, has_offset = false, has_grad = false; + bool has_dref = false, is_sparse_feedback = false, has_min_lod = false; + uint32_t lod = 0; + }; + virtual std::string to_function_name(const TextureFunctionNameArguments &args); + + struct TextureFunctionArguments + { + // GCC 4.8 workarounds, it doesn't understand '{}' constructor here, use explicit default constructor. + TextureFunctionArguments() = default; + TextureFunctionBaseArguments base; + uint32_t coord = 0, coord_components = 0, dref = 0; + uint32_t grad_x = 0, grad_y = 0, lod = 0, coffset = 0, offset = 0; + uint32_t bias = 0, component = 0, sample = 0, sparse_texel = 0, min_lod = 0; + }; + virtual std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward); + + void emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, + uint32_t &texel_id); + uint32_t get_sparse_feedback_texel_id(uint32_t id) const; + virtual void emit_buffer_block(const SPIRVariable &type); + virtual void emit_push_constant_block(const SPIRVariable &var); + virtual void emit_uniform(const SPIRVariable &var); + virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool packed_type, bool row_major); + + virtual bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const; + + void emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, + SmallVector chain); + + StringStream<> buffer; + + template + inline void statement_inner(T &&t) + { + buffer << std::forward(t); + statement_count++; + } + + template + inline void statement_inner(T &&t, Ts &&... ts) + { + buffer << std::forward(t); + statement_count++; + statement_inner(std::forward(ts)...); + } + + template + inline void statement(Ts &&... ts) + { + if (is_forcing_recompilation()) + { + // Do not bother emitting code while force_recompile is active. + // We will compile again. + statement_count++; + return; + } + + if (redirect_statement) + { + redirect_statement->push_back(join(std::forward(ts)...)); + statement_count++; + } + else + { + for (uint32_t i = 0; i < indent; i++) + buffer << " "; + statement_inner(std::forward(ts)...); + buffer << '\n'; + } + } + + template + inline void statement_no_indent(Ts &&... ts) + { + auto old_indent = indent; + indent = 0; + statement(std::forward(ts)...); + indent = old_indent; + } + + // Used for implementing continue blocks where + // we want to obtain a list of statements we can merge + // on a single line separated by comma. + SmallVector *redirect_statement = nullptr; + const SPIRBlock *current_continue_block = nullptr; + + void begin_scope(); + void end_scope(); + void end_scope(const std::string &trailer); + void end_scope_decl(); + void end_scope_decl(const std::string &decl); + + Options options; + + virtual std::string type_to_array_glsl( + const SPIRType &type); // Allow Metal to use the array template to make arrays a value type + std::string to_array_size(const SPIRType &type, uint32_t index); + uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const; + uint32_t to_array_size_literal(const SPIRType &type) const; + virtual std::string variable_decl(const SPIRVariable &variable); // Threadgroup arrays can't have a wrapper type + std::string variable_decl_function_local(SPIRVariable &variable); + + void add_local_variable_name(uint32_t id); + void add_resource_name(uint32_t id); + void add_member_name(SPIRType &type, uint32_t name); + void add_function_overload(const SPIRFunction &func); + + virtual bool is_non_native_row_major_matrix(uint32_t id); + virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); + bool member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const; + bool member_is_packed_physical_type(const SPIRType &type, uint32_t index) const; + virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, + uint32_t physical_type_id, bool is_packed); + + std::unordered_set local_variable_names; + std::unordered_set resource_names; + std::unordered_set block_input_names; + std::unordered_set block_output_names; + std::unordered_set block_ubo_names; + std::unordered_set block_ssbo_names; + std::unordered_set block_names; // A union of all block_*_names. + std::unordered_map> function_overloads; + std::unordered_map preserved_aliases; + void preserve_alias_on_reset(uint32_t id); + void reset_name_caches(); + + bool processing_entry_point = false; + + // Can be overriden by subclass backends for trivial things which + // shouldn't need polymorphism. + struct BackendVariations + { + std::string discard_literal = "discard"; + std::string demote_literal = "demote"; + std::string null_pointer_literal = ""; + bool float_literal_suffix = false; + bool double_literal_suffix = true; + bool uint32_t_literal_suffix = true; + bool long_long_literal_suffix = false; + const char *basic_int_type = "int"; + const char *basic_uint_type = "uint"; + const char *basic_int8_type = "int8_t"; + const char *basic_uint8_type = "uint8_t"; + const char *basic_int16_type = "int16_t"; + const char *basic_uint16_type = "uint16_t"; + const char *int16_t_literal_suffix = "s"; + const char *uint16_t_literal_suffix = "us"; + const char *nonuniform_qualifier = "nonuniformEXT"; + const char *boolean_mix_function = "mix"; + bool swizzle_is_function = false; + bool shared_is_implied = false; + bool unsized_array_supported = true; + bool explicit_struct_type = false; + bool use_initializer_list = false; + bool use_typed_initializer_list = false; + bool can_declare_struct_inline = true; + bool can_declare_arrays_inline = true; + bool native_row_major_matrix = true; + bool use_constructor_splatting = true; + bool allow_precision_qualifiers = false; + bool can_swizzle_scalar = false; + bool force_gl_in_out_block = false; + bool can_return_array = true; + bool allow_truncated_access_chain = false; + bool supports_extensions = false; + bool supports_empty_struct = false; + bool array_is_value_type = true; + bool buffer_offset_array_is_value_type = true; + bool comparison_image_samples_scalar = false; + bool native_pointers = false; + bool support_small_type_sampling_result = false; + bool support_case_fallthrough = true; + bool use_array_constructor = false; + bool needs_row_major_load_workaround = false; + } backend; + + void emit_struct(SPIRType &type); + void emit_resources(); + void emit_extension_workarounds(spv::ExecutionModel model); + void emit_buffer_block_native(const SPIRVariable &var); + void emit_buffer_reference_block(SPIRType &type, bool forward_declaration); + void emit_buffer_block_legacy(const SPIRVariable &var); + void emit_buffer_block_flattened(const SPIRVariable &type); + void fixup_implicit_builtin_block_names(); + void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model); + bool should_force_emit_builtin_block(spv::StorageClass storage); + void emit_push_constant_block_vulkan(const SPIRVariable &var); + void emit_push_constant_block_glsl(const SPIRVariable &var); + void emit_interface_block(const SPIRVariable &type); + void emit_flattened_io_block(const SPIRVariable &var, const char *qual); + void emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices); + void emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector &indices); + void emit_block_chain(SPIRBlock &block); + void emit_hoisted_temporaries(SmallVector> &temporaries); + std::string constant_value_macro_name(uint32_t id); + void emit_constant(const SPIRConstant &constant); + void emit_specialization_constant_op(const SPIRConstantOp &constant); + std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block); + bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); + + void branch(BlockID from, BlockID to); + void branch_to_continue(BlockID from, BlockID to); + void branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block); + void flush_phi(BlockID from, BlockID to); + void flush_variable_declaration(uint32_t id); + void flush_undeclared_variables(SPIRBlock &block); + void emit_variable_temporary_copies(const SPIRVariable &var); + + bool should_dereference(uint32_t id); + bool should_forward(uint32_t id) const; + bool should_suppress_usage_tracking(uint32_t id) const; + void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); + void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op); + bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp); + void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + uint32_t op3, const char *op); + void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + const char *op); + void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + + void emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, + SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type); + void emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + void emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type); + void emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + const char *op, SPIRType::BaseType input_type); + void emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, SPIRType::BaseType expected_result_type, + SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, + SPIRType::BaseType input_type2); + void emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + uint32_t op3, const char *op, SPIRType::BaseType offset_count_type); + + void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op); + void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + bool negate, SPIRType::BaseType expected_type); + void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + + SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type); + + virtual bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0); + + std::string to_ternary_expression(const SPIRType &result_type, uint32_t select, uint32_t true_value, + uint32_t false_value); + + void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + bool expression_is_forwarded(uint32_t id) const; + bool expression_suppresses_usage_tracking(uint32_t id) const; + bool expression_read_implies_multiple_reads(uint32_t id) const; + SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, + bool suppress_usage_tracking = false); + + void access_chain_internal_append_index(std::string &expr, uint32_t base, const SPIRType *type, + AccessChainFlags flags, bool &access_chain_is_arrayed, uint32_t index); + + std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, + AccessChainMeta *meta); + + virtual void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, + spv::StorageClass storage, bool &is_packed); + + std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, + AccessChainMeta *meta = nullptr, bool ptr_chain = false); + + std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + uint32_t array_stride, bool need_transpose); + std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset); + std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + bool need_transpose); + std::string flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + bool need_transpose); + std::pair flattened_access_chain_offset(const SPIRType &basetype, const uint32_t *indices, + uint32_t count, uint32_t offset, + uint32_t word_stride, bool *need_transpose = nullptr, + uint32_t *matrix_stride = nullptr, + uint32_t *array_stride = nullptr, + bool ptr_chain = false); + + const char *index_to_swizzle(uint32_t index); + std::string remap_swizzle(const SPIRType &result_type, uint32_t input_components, const std::string &expr); + std::string declare_temporary(uint32_t type, uint32_t id); + void emit_uninitialized_temporary(uint32_t type, uint32_t id); + SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); + void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); + std::string to_expression(uint32_t id, bool register_expression_read = true); + std::string to_composite_constructor_expression(uint32_t id, bool uses_buffer_offset); + std::string to_rerolled_array_expression(const std::string &expr, const SPIRType &type); + std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true); + std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true); + std::string to_unpacked_row_major_matrix_expression(uint32_t id); + std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true); + std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true); + std::string to_pointer_expression(uint32_t id, bool register_expression_read = true); + std::string to_enclosed_pointer_expression(uint32_t id, bool register_expression_read = true); + std::string to_extract_component_expression(uint32_t id, uint32_t index); + std::string enclose_expression(const std::string &expr); + std::string dereference_expression(const SPIRType &expression_type, const std::string &expr); + std::string address_of_expression(const std::string &expr); + void strip_enclosed_expression(std::string &expr); + std::string to_member_name(const SPIRType &type, uint32_t index); + virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain); + std::string to_multi_member_reference(const SPIRType &type, const SmallVector &indices); + std::string type_to_glsl_constructor(const SPIRType &type); + std::string argument_decl(const SPIRFunction::Parameter &arg); + virtual std::string to_qualifiers_glsl(uint32_t id); + const char *to_precision_qualifiers_glsl(uint32_t id); + virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var); + const char *flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags); + const char *format_to_glsl(spv::ImageFormat format); + virtual std::string layout_for_member(const SPIRType &type, uint32_t index); + virtual std::string to_interpolation_qualifiers(const Bitset &flags); + std::string layout_for_variable(const SPIRVariable &variable); + std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id); + virtual bool skip_argument(uint32_t id) const; + virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, + spv::StorageClass rhs_storage); + virtual void emit_block_hints(const SPIRBlock &block); + virtual std::string to_initializer_expression(const SPIRVariable &var); + virtual std::string to_zero_initialized_expression(uint32_t type_id); + bool type_can_zero_initialize(const SPIRType &type) const; + + bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, + uint32_t *failed_index = nullptr, uint32_t start_offset = 0, + uint32_t end_offset = ~(0u)); + std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout); + + uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing); + uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + uint32_t type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + uint32_t type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + + std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg); + virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type); + + std::string bitcast_expression(SPIRType::BaseType target_type, uint32_t arg); + std::string bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, const std::string &expr); + + std::string build_composite_combiner(uint32_t result_type, const uint32_t *elems, uint32_t length); + bool remove_duplicate_swizzle(std::string &op); + bool remove_unity_swizzle(uint32_t base, std::string &op); + + // Can modify flags to remote readonly/writeonly if image type + // and force recompile. + bool check_atomic_image(uint32_t id); + + virtual void replace_illegal_names(); + void replace_illegal_names(const std::unordered_set &keywords); + virtual void emit_entry_point_declarations(); + + void replace_fragment_output(SPIRVariable &var); + void replace_fragment_outputs(); + std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t id); + + uint32_t indent = 0; + + std::unordered_set emitted_functions; + + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + std::unordered_set flushed_phi_variables; + + std::unordered_set flattened_buffer_blocks; + std::unordered_map flattened_structs; + + ShaderSubgroupSupportHelper shader_subgroup_supporter; + + std::string load_flattened_struct(const std::string &basename, const SPIRType &type); + std::string to_flattened_struct_member(const std::string &basename, const SPIRType &type, uint32_t index); + void store_flattened_struct(uint32_t lhs_id, uint32_t value); + void store_flattened_struct(const std::string &basename, uint32_t rhs, const SPIRType &type, + const SmallVector &indices); + std::string to_flattened_access_chain_expression(uint32_t id); + + // Usage tracking. If a temporary is used more than once, use the temporary instead to + // avoid AST explosion when SPIRV is generated with pure SSA and doesn't write stuff to variables. + std::unordered_map expression_usage_counts; + void track_expression_read(uint32_t id); + + SmallVector forced_extensions; + SmallVector header_lines; + + // Used when expressions emit extra opcodes with their own unique IDs, + // and we need to reuse the IDs across recompilation loops. + // Currently used by NMin/Max/Clamp implementations. + std::unordered_map extra_sub_expressions; + + SmallVector workaround_ubo_load_overload_types; + void request_workaround_wrapper_overload(TypeID id); + void rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr); + + uint32_t statement_count = 0; + + inline bool is_legacy() const + { + return (options.es && options.version < 300) || (!options.es && options.version < 130); + } + + inline bool is_legacy_es() const + { + return options.es && options.version < 300; + } + + inline bool is_legacy_desktop() const + { + return !options.es && options.version < 130; + } + + bool requires_transpose_2x2 = false; + bool requires_transpose_3x3 = false; + bool requires_transpose_4x4 = false; + + bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); + void register_call_out_argument(uint32_t id); + void register_impure_function_call(); + void register_control_dependent_expression(uint32_t expr); + + // GL_EXT_shader_pixel_local_storage support. + std::vector pls_inputs; + std::vector pls_outputs; + std::string pls_decl(const PlsRemap &variable); + const char *to_pls_qualifiers_glsl(const SPIRVariable &variable); + void emit_pls(); + void remap_pls_variables(); + + // GL_EXT_shader_framebuffer_fetch support. + std::vector> subpass_to_framebuffer_fetch_attachment; + std::unordered_set inout_color_attachments; + bool subpass_input_is_framebuffer_fetch(uint32_t id) const; + void emit_inout_fragment_outputs_copy_to_subpass_inputs(); + const SPIRVariable *find_subpass_input_by_attachment_index(uint32_t index) const; + const SPIRVariable *find_color_output_by_location(uint32_t location) const; + + // A variant which takes two sets of name. The secondary is only used to verify there are no collisions, + // but the set is not updated when we have found a new name. + // Used primarily when adding block interface names. + void add_variable(std::unordered_set &variables_primary, + const std::unordered_set &variables_secondary, std::string &name); + + void check_function_call_constraints(const uint32_t *args, uint32_t length); + void handle_invalid_expression(uint32_t id); + void find_static_extensions(); + + std::string emit_for_loop_initializers(const SPIRBlock &block); + void emit_while_loop_initializers(const SPIRBlock &block); + bool for_loop_initializers_are_same_type(const SPIRBlock &block); + bool optimize_read_modify_write(const SPIRType &type, const std::string &lhs, const std::string &rhs); + void fixup_image_load_store_access(); + + bool type_is_empty(const SPIRType &type); + + virtual void declare_undefined_values(); + + bool can_use_io_location(spv::StorageClass storage, bool block); + const Instruction *get_next_instruction_in_block(const Instruction &instr); + static uint32_t mask_relevant_memory_semantics(uint32_t semantics); + + std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + + std::string convert_separate_image_to_expression(uint32_t id); + + // Builtins in GLSL are always specific signedness, but the SPIR-V can declare them + // as either unsigned or signed. + // Sometimes we will need to automatically perform casts on load and store to make this work. + virtual void cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type); + virtual void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type); + void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr); + void convert_non_uniform_expression(const SPIRType &type, std::string &expr); + + void handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id); + void disallow_forwarding_in_expression_chain(const SPIRExpression &expr); + + bool expression_is_constant_null(uint32_t id) const; + bool expression_is_non_value_type_array(uint32_t ptr); + virtual void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression); + + uint32_t get_integer_width_for_instruction(const Instruction &instr) const; + uint32_t get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *arguments, uint32_t length) const; + + bool variable_is_lut(const SPIRVariable &var) const; + + char current_locale_radix_character = '.'; + + void fixup_type_alias(); + void reorder_type_alias(); + + void propagate_nonuniform_qualifier(uint32_t id); + + static const char *vector_swizzle(int vecsize, int index); + +private: + void init(); +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_hlsl.cpp b/dep/spirv-cross/spirv_hlsl.cpp new file mode 100644 index 000000000..f79e2be7a --- /dev/null +++ b/dep/spirv-cross/spirv_hlsl.cpp @@ -0,0 +1,5782 @@ +/* + * Copyright 2016-2020 Robert Konrad + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_hlsl.hpp" +#include "GLSL.std.450.h" +#include +#include + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +enum class ImageFormatNormalizedState +{ + None = 0, + Unorm = 1, + Snorm = 2 +}; + +static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + return ImageFormatNormalizedState::Unorm; + + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + return ImageFormatNormalizedState::Snorm; + + default: + break; + } + + return ImageFormatNormalizedState::None; +} + +static unsigned image_format_to_components(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatR16f: + case ImageFormatR32f: + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + return 1; + + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRg16f: + case ImageFormatRg32f: + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + return 2; + + case ImageFormatR11fG11fB10f: + return 3; + + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + case ImageFormatRgba16f: + case ImageFormatRgba32f: + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + case ImageFormatRgb10a2ui: + return 4; + + case ImageFormatUnknown: + return 4; // Assume 4. + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + +static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float"; + case ImageFormatRg8: + case ImageFormatRg16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float2"; + case ImageFormatRgba8: + case ImageFormatRgba16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float4"; + case ImageFormatRgb10A2: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float4"; + + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float"; + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float2"; + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float4"; + + case ImageFormatR16f: + case ImageFormatR32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float"; + case ImageFormatRg16f: + case ImageFormatRg32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float2"; + case ImageFormatRgba16f: + case ImageFormatRgba32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float4"; + + case ImageFormatR11fG11fB10f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float3"; + + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int"; + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int2"; + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int4"; + + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint"; + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint2"; + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint4"; + case ImageFormatRgb10a2ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint4"; + + case ImageFormatUnknown: + switch (basetype) + { + case SPIRType::Float: + return "float4"; + case SPIRType::Int: + return "int4"; + case SPIRType::UInt: + return "uint4"; + default: + SPIRV_CROSS_THROW("Unsupported base type for image."); + } + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + +string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id) +{ + auto &imagetype = get(type.image.type); + const char *dim = nullptr; + bool typed_load = false; + uint32_t components = 4; + + bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable); + + switch (type.image.dim) + { + case Dim1D: + typed_load = type.image.sampled == 2; + dim = "1D"; + break; + case Dim2D: + typed_load = type.image.sampled == 2; + dim = "2D"; + break; + case Dim3D: + typed_load = type.image.sampled == 2; + dim = "3D"; + break; + case DimCube: + if (type.image.sampled == 2) + SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL."); + dim = "Cube"; + break; + case DimRect: + SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO + case DimBuffer: + if (type.image.sampled == 1) + return join("Buffer<", type_to_glsl(imagetype), components, ">"); + else if (type.image.sampled == 2) + { + if (interlocked_resources.count(id)) + return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), + ">"); + + typed_load = !force_image_srv && type.image.sampled == 2; + + const char *rw = force_image_srv ? "" : "RW"; + return join(rw, "Buffer<", + typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : + join(type_to_glsl(imagetype), components), + ">"); + } + else + SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); + case DimSubpassData: + dim = "2D"; + typed_load = false; + break; + default: + SPIRV_CROSS_THROW("Invalid dimension."); + } + const char *arrayed = type.image.arrayed ? "Array" : ""; + const char *ms = type.image.ms ? "MS" : ""; + const char *rw = typed_load && !force_image_srv ? "RW" : ""; + + if (force_image_srv) + typed_load = false; + + if (typed_load && interlocked_resources.count(id)) + rw = "RasterizerOrdered"; + + return join(rw, "Texture", dim, ms, arrayed, "<", + typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : + join(type_to_glsl(imagetype), components), + ">"); +} + +string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/) +{ + auto &imagetype = get(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int: + res = "i"; + break; + case SPIRType::UInt: + res = "u"; + break; + default: + break; + } + + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); + + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) + { + // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. + if (type.image.dim == DimBuffer && type.image.sampled == 1) + res += "sampler"; + else + res += type.image.sampled == 2 ? "image" : "texture"; + } + else + res += "sampler"; + + switch (type.image.dim) + { + case Dim1D: + res += "1D"; + break; + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "CUBE"; + break; + + case DimBuffer: + res += "Buffer"; + break; + + case DimSubpassData: + res += "2D"; + break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); + } + + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) + res += "Array"; + + return res; +} + +string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id) +{ + if (hlsl_options.shader_model <= 30) + return image_type_hlsl_legacy(type, id); + else + return image_type_hlsl_modern(type, id); +} + +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_hlsl(type, id); + + case SPIRType::Sampler: + return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Half: + if (hlsl_options.enable_16bit_types) + return "half"; + else + return "min16float"; + case SPIRType::Short: + if (hlsl_options.enable_16bit_types) + return "int16_t"; + else + return "min16int"; + case SPIRType::UShort: + if (hlsl_options.enable_16bit_types) + return "uint16_t"; + else + return "min16uint"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); + return "int64_t"; + case SPIRType::UInt64: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); + return "uint64_t"; + default: + return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bool", type.vecsize); + case SPIRType::Int: + return join("int", type.vecsize); + case SPIRType::UInt: + return join("uint", type.vecsize); + case SPIRType::Half: + return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize); + case SPIRType::Short: + return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize); + case SPIRType::UShort: + return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize); + case SPIRType::Float: + return join("float", type.vecsize); + case SPIRType::Double: + return join("double", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bool", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("int", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("uint", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize); + case SPIRType::Short: + return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize); + case SPIRType::UShort: + return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("float", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("double", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } +} + +void CompilerHLSL::emit_header() +{ + for (auto &header : header_lines) + statement(header); + + if (header_lines.size() > 0) + { + statement(""); + } +} + +void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var) +{ + add_resource_name(var.self); + + // The global copies of I/O variables should not contain interpolation qualifiers. + // These are emitted inside the interface structs. + auto &flags = ir.meta[var.self].decoration.decoration_flags; + auto old_flags = flags; + flags.reset(); + statement("static ", variable_decl(var), ";"); + flags = old_flags; +} + +const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) +{ + // Input and output variables are handled specially in HLSL backend. + // The variables are declared as global, private variables, and do not need any qualifiers. + if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || + var.storage == StorageClassPushConstant) + { + return "uniform "; + } + + return ""; +} + +void CompilerHLSL::emit_builtin_outputs_in_struct() +{ + auto &execution = get_entry_point(); + + bool legacy = hlsl_options.shader_model <= 30; + active_output_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast(i); + switch (builtin) + { + case BuiltInPosition: + type = "float4"; + semantic = legacy ? "POSITION" : "SV_Position"; + break; + + case BuiltInSampleMask: + if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher."); + type = "uint"; + semantic = "SV_Coverage"; + break; + + case BuiltInFragDepth: + type = "float"; + if (legacy) + { + semantic = "DEPTH"; + } + else + { + if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater)) + semantic = "SV_DepthGreaterEqual"; + else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess)) + semantic = "SV_DepthLessEqual"; + else + semantic = "SV_Depth"; + } + break; + + case BuiltInClipDistance: + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = clip / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } + break; + + case BuiltInCullDistance: + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = cull / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } + break; + + case BuiltInPointSize: + // If point_size_compat is enabled, just ignore PointSize. + // PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders, + // even if it means working around the missing feature. + if (hlsl_options.point_size_compat) + break; + else + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + break; + } + + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); + }); +} + +void CompilerHLSL::emit_builtin_inputs_in_struct() +{ + bool legacy = hlsl_options.shader_model <= 30; + active_input_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast(i); + switch (builtin) + { + case BuiltInFragCoord: + type = "float4"; + semantic = legacy ? "VPOS" : "SV_Position"; + break; + + case BuiltInVertexId: + case BuiltInVertexIndex: + if (legacy) + SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_VertexID"; + break; + + case BuiltInInstanceId: + case BuiltInInstanceIndex: + if (legacy) + SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_InstanceID"; + break; + + case BuiltInSampleId: + if (legacy) + SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_SampleIndex"; + break; + + case BuiltInSampleMask: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher."); + type = "uint"; + semantic = "SV_Coverage"; + break; + + case BuiltInGlobalInvocationId: + type = "uint3"; + semantic = "SV_DispatchThreadID"; + break; + + case BuiltInLocalInvocationId: + type = "uint3"; + semantic = "SV_GroupThreadID"; + break; + + case BuiltInLocalInvocationIndex: + type = "uint"; + semantic = "SV_GroupIndex"; + break; + + case BuiltInWorkgroupId: + type = "uint3"; + semantic = "SV_GroupID"; + break; + + case BuiltInFrontFacing: + type = "bool"; + semantic = "SV_IsFrontFace"; + break; + + case BuiltInNumWorkgroups: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + case BuiltInSubgroupEqMask: + case BuiltInSubgroupLtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupGeMask: + // Handled specially. + break; + + case BuiltInClipDistance: + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = clip / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } + break; + + case BuiltInCullDistance: + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = cull / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } + break; + + case BuiltInPointCoord: + // PointCoord is not supported, but provide a way to just ignore that, similar to PointSize. + if (hlsl_options.point_coord_compat) + break; + else + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + break; + } + + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";"); + }); +} + +uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const +{ + // TODO: Need to verify correctness. + uint32_t elements = 0; + + if (type.basetype == SPIRType::Struct) + { + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + elements += type_to_consumed_locations(get(type.member_types[i])); + } + else + { + uint32_t array_multiplier = 1; + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + { + if (type.array_size_literal[i]) + array_multiplier *= type.array[i]; + else + array_multiplier *= evaluate_constant_u32(type.array[i]); + } + elements += array_multiplier * type.columns; + } + return elements; +} + +string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags) +{ + string res; + //if (flags & (1ull << DecorationSmooth)) + // res += "linear "; + if (flags.get(DecorationFlat)) + res += "nointerpolation "; + if (flags.get(DecorationNoPerspective)) + res += "noperspective "; + if (flags.get(DecorationCentroid)) + res += "centroid "; + if (flags.get(DecorationPatch)) + res += "patch "; // Seems to be different in actual HLSL. + if (flags.get(DecorationSample)) + res += "sample "; + if (flags.get(DecorationInvariant)) + res += "invariant "; // Not supported? + + return res; +} + +std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, StorageClass sc) +{ + if (em == ExecutionModelVertex && sc == StorageClassInput) + { + // We have a vertex attribute - we should look at remapping it if the user provided + // vertex attribute hints. + for (auto &attribute : remap_vertex_attributes) + if (attribute.location == location) + return attribute.semantic; + } + + // Not a vertex attribute, or no remap_vertex_attributes entry. + return join("TEXCOORD", location); +} + +void CompilerHLSL::emit_io_block(const SPIRVariable &var) +{ + auto &execution = get_entry_point(); + + auto &type = get(var.basetype); + add_resource_name(type.self); + + statement("struct ", to_name(type.self)); + begin_scope(); + type.member_name_cache.clear(); + + uint32_t base_location = get_decoration(var.self, DecorationLocation); + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + string semantic; + if (has_member_decoration(type.self, i, DecorationLocation)) + { + uint32_t location = get_member_decoration(type.self, i, DecorationLocation); + semantic = join(" : ", to_semantic(location, execution.model, var.storage)); + } + else + { + // If the block itself has a location, but not its members, use the implicit location. + // There could be a conflict if the block members partially specialize the locations. + // It is unclear how SPIR-V deals with this. Assume this does not happen for now. + uint32_t location = base_location + i; + semantic = join(" : ", to_semantic(location, execution.model, var.storage)); + } + + add_member_name(type, i); + + auto &membertype = get(type.member_types[i]); + statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, i)), + variable_decl(membertype, to_member_name(type, i)), semantic, ";"); + } + + end_scope_decl(); + statement(""); + + statement("static ", variable_decl(var), ";"); + statement(""); +} + +void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set &active_locations) +{ + auto &execution = get_entry_point(); + auto type = get(var.basetype); + + string binding; + bool use_location_number = true; + bool legacy = hlsl_options.shader_model <= 30; + if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) + { + // Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1. + uint32_t index = get_decoration(var.self, DecorationIndex); + uint32_t location = get_decoration(var.self, DecorationLocation); + + if (index != 0 && location != 0) + SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL."); + + binding = join(legacy ? "COLOR" : "SV_Target", location + index); + use_location_number = false; + if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP) + type.vecsize = 4; + } + + const auto get_vacant_location = [&]() -> uint32_t { + for (uint32_t i = 0; i < 64; i++) + if (!active_locations.count(i)) + return i; + SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted."); + }; + + bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; + + auto &m = ir.meta[var.self].decoration; + auto name = to_name(var.self); + if (use_location_number) + { + uint32_t location_number; + + // If an explicit location exists, use it with TEXCOORD[N] semantic. + // Otherwise, pick a vacant location. + if (m.decoration_flags.get(DecorationLocation)) + location_number = m.location; + else + location_number = get_vacant_location(); + + // Allow semantic remap if specified. + auto semantic = to_semantic(location_number, execution.model, var.storage); + + if (need_matrix_unroll && type.columns > 1) + { + if (!type.array.empty()) + SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported."); + + // Unroll matrices. + for (uint32_t i = 0; i < type.columns; i++) + { + SPIRType newtype = type; + newtype.columns = 1; + + string effective_semantic; + if (hlsl_options.flatten_matrix_vertex_input_semantics) + effective_semantic = to_semantic(location_number, execution.model, var.storage); + else + effective_semantic = join(semantic, "_", i); + + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), + variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";"); + active_locations.insert(location_number++); + } + } + else + { + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ", + semantic, ";"); + + // Structs and arrays should consume more locations. + uint32_t consumed_locations = type_to_consumed_locations(type); + for (uint32_t i = 0; i < consumed_locations; i++) + active_locations.insert(location_number + i); + } + } + else + statement(variable_decl(type, name), " : ", binding, ";"); +} + +std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) +{ + switch (builtin) + { + case BuiltInVertexId: + return "gl_VertexID"; + case BuiltInInstanceId: + return "gl_InstanceID"; + case BuiltInNumWorkgroups: + { + if (!num_workgroups_builtin) + SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. " + "Cannot emit code for this builtin."); + + auto &var = get(num_workgroups_builtin); + auto &type = get(var.basetype); + auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0)); + ParsedIR::sanitize_underscores(ret); + return ret; + } + case BuiltInPointCoord: + // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set. + return "float2(0.5f, 0.5f)"; + case BuiltInSubgroupLocalInvocationId: + return "WaveGetLaneIndex()"; + case BuiltInSubgroupSize: + return "WaveGetLaneCount()"; + + default: + return CompilerGLSL::builtin_to_glsl(builtin, storage); + } +} + +void CompilerHLSL::emit_builtin_variables() +{ + Bitset builtins = active_input_builtins; + builtins.merge_or(active_output_builtins); + + bool need_base_vertex_info = false; + + // Emit global variables for the interface variables which are statically used by the shader. + builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + auto builtin = static_cast(i); + uint32_t array_size = 0; + + switch (builtin) + { + case BuiltInFragCoord: + case BuiltInPosition: + type = "float4"; + break; + + case BuiltInFragDepth: + type = "float"; + break; + + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + type = "int"; + if (hlsl_options.support_nonzero_base_vertex_base_instance) + need_base_vertex_info = true; + break; + + case BuiltInInstanceId: + case BuiltInSampleId: + type = "int"; + break; + + case BuiltInPointSize: + if (hlsl_options.point_size_compat) + { + // Just emit the global variable, it will be ignored. + type = "float"; + break; + } + else + SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); + + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + type = "uint3"; + break; + + case BuiltInLocalInvocationIndex: + type = "uint"; + break; + + case BuiltInFrontFacing: + type = "bool"; + break; + + case BuiltInNumWorkgroups: + case BuiltInPointCoord: + // Handled specially. + break; + + case BuiltInSubgroupLocalInvocationId: + case BuiltInSubgroupSize: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); + break; + + case BuiltInSubgroupEqMask: + case BuiltInSubgroupLtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupGeMask: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); + type = "uint4"; + break; + + case BuiltInClipDistance: + array_size = clip_distance_count; + type = "float"; + break; + + case BuiltInCullDistance: + array_size = cull_distance_count; + type = "float"; + break; + + case BuiltInSampleMask: + type = "int"; + break; + + default: + SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); + } + + StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput; + + if (type) + { + if (array_size) + statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "];"); + else + statement("static ", type, " ", builtin_to_glsl(builtin, storage), ";"); + } + + // SampleMask can be both in and out with sample builtin, in this case we have already + // declared the input variable and we need to add the output one now. + if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i)) + { + statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), ";"); + } + }); + + if (need_base_vertex_info) + { + statement("cbuffer SPIRV_Cross_VertexInfo"); + begin_scope(); + statement("int SPIRV_Cross_BaseVertex;"); + statement("int SPIRV_Cross_BaseInstance;"); + end_scope_decl(); + statement(""); + } +} + +void CompilerHLSL::emit_composite_constants() +{ + // HLSL cannot declare structs or arrays inline, so we must move them out to + // global constants directly. + bool emitted = false; + + ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; + + auto &type = this->get(c.constant_type); + if (type.basetype == SPIRType::Struct || !type.array.empty()) + { + auto name = to_name(c.self); + statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); + emitted = true; + } + }); + + if (emitted) + statement(""); +} + +void CompilerHLSL::emit_specialization_constants_and_structs() +{ + bool emitted = false; + SpecializationConstant wg_x, wg_y, wg_z; + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + auto loop_lock = ir.create_loop_hard_lock(); + for (auto &id_ : ir.ids_for_constant_or_type) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + + if (c.self == workgroup_size_id) + { + statement("static const uint3 gl_WorkGroupSize = ", + constant_expression(get(workgroup_size_id)), ";"); + emitted = true; + } + else if (c.specialization) + { + auto &type = get(c.constant_type); + auto name = to_name(c.self); + + // HLSL does not support specialization constants, so fallback to macros. + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";"); + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + auto &c = id.get(); + auto &type = get(c.basetype); + auto name = to_name(c.self); + statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + emitted = true; + } + else if (id.get_type() == TypeType) + { + auto &type = id.get(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && + !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + if (emitted) + statement(""); + emitted = false; + + emit_struct(type); + } + } + } + + if (emitted) + statement(""); +} + +void CompilerHLSL::replace_illegal_names() +{ + static const unordered_set keywords = { + // Additional HLSL specific keywords. + "line", "linear", "matrix", "point", "row_major", "sampler", + }; + + CompilerGLSL::replace_illegal_names(keywords); + CompilerGLSL::replace_illegal_names(); +} + +void CompilerHLSL::declare_undefined_values() +{ + bool emitted = false; + ir.for_each_typed_id([&](uint32_t, const SPIRUndef &undef) { + auto &type = this->get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); + + statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); + emitted = true; + }); + + if (emitted) + statement(""); +} + +void CompilerHLSL::emit_resources() +{ + auto &execution = get_entry_point(); + + replace_illegal_names(); + + emit_specialization_constants_and_structs(); + emit_composite_constants(); + + bool emitted = false; + + // Output UBOs and SSBOs + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; + bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && + has_block_flags) + { + emit_buffer_block(var); + emitted = true; + } + }); + + // Output push constant blocks + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + emitted = true; + } + }); + + if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30) + { + statement("uniform float4 gl_HalfPixel;"); + emitted = true; + } + + bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30; + + // Output Uniform Constants (values, samplers, images, etc). + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + // Sampler buffers are always used without a sampler, and they will also work in regular D3D. + bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (!sampler_buffer && (separate_image || separate_sampler)) + return; + } + + if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable && + type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + { + emit_uniform(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + // Emit builtin input and output variables here. + emit_builtin_variables(); + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + // Do not emit I/O blocks here. + // I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders + // and tessellation down the line. + if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + // Only emit non-builtins which are not blocks here. Builtin variables are handled separately. + emit_interface_block_globally(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + require_input = false; + require_output = false; + unordered_set active_inputs; + unordered_set active_outputs; + SmallVector input_variables; + SmallVector output_variables; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + + // Do not emit I/O blocks here. + // I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders + // and tessellation down the line. + if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + if (var.storage == StorageClassInput) + input_variables.push_back(&var); + else + output_variables.push_back(&var); + } + + // Reserve input and output locations for block variables as necessary. + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto &active = var.storage == StorageClassInput ? active_inputs : active_outputs; + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + if (has_member_decoration(type.self, i, DecorationLocation)) + { + uint32_t location = get_member_decoration(type.self, i, DecorationLocation); + active.insert(location); + } + } + + // Emit the block struct and a global variable here. + emit_io_block(var); + } + }); + + const auto variable_compare = [&](const SPIRVariable *a, const SPIRVariable *b) -> bool { + // Sort input and output variables based on, from more robust to less robust: + // - Location + // - Variable has a location + // - Name comparison + // - Variable has a name + // - Fallback: ID + bool has_location_a = has_decoration(a->self, DecorationLocation); + bool has_location_b = has_decoration(b->self, DecorationLocation); + + if (has_location_a && has_location_b) + { + return get_decoration(a->self, DecorationLocation) < get_decoration(b->self, DecorationLocation); + } + else if (has_location_a && !has_location_b) + return true; + else if (!has_location_a && has_location_b) + return false; + + const auto &name1 = to_name(a->self); + const auto &name2 = to_name(b->self); + + if (name1.empty() && name2.empty()) + return a->self < b->self; + else if (name1.empty()) + return true; + else if (name2.empty()) + return false; + + return name1.compare(name2) < 0; + }; + + auto input_builtins = active_input_builtins; + input_builtins.clear(BuiltInNumWorkgroups); + input_builtins.clear(BuiltInPointCoord); + input_builtins.clear(BuiltInSubgroupSize); + input_builtins.clear(BuiltInSubgroupLocalInvocationId); + input_builtins.clear(BuiltInSubgroupEqMask); + input_builtins.clear(BuiltInSubgroupLtMask); + input_builtins.clear(BuiltInSubgroupLeMask); + input_builtins.clear(BuiltInSubgroupGtMask); + input_builtins.clear(BuiltInSubgroupGeMask); + + if (!input_variables.empty() || !input_builtins.empty()) + { + require_input = true; + statement("struct SPIRV_Cross_Input"); + + begin_scope(); + sort(input_variables.begin(), input_variables.end(), variable_compare); + for (auto var : input_variables) + emit_interface_block_in_struct(*var, active_inputs); + emit_builtin_inputs_in_struct(); + end_scope_decl(); + statement(""); + } + + if (!output_variables.empty() || !active_output_builtins.empty()) + { + require_output = true; + statement("struct SPIRV_Cross_Output"); + + begin_scope(); + // FIXME: Use locations properly if they exist. + sort(output_variables.begin(), output_variables.end(), variable_compare); + for (auto var : output_variables) + emit_interface_block_in_struct(*var, active_outputs); + emit_builtin_outputs_in_struct(); + end_scope_decl(); + statement(""); + } + + // Global variables. + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage != StorageClassOutput) + { + if (!variable_is_lut(var)) + { + add_resource_name(var.self); + + const char *storage = nullptr; + switch (var.storage) + { + case StorageClassWorkgroup: + storage = "groupshared"; + break; + + default: + storage = "static"; + break; + } + + string initializer; + if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && + !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); + } + statement(storage, " ", variable_decl(var), initializer, ";"); + + emitted = true; + } + } + } + + if (emitted) + statement(""); + + declare_undefined_values(); + + if (requires_op_fmod) + { + static const char *types[] = { + "float", + "float2", + "float3", + "float4", + }; + + for (auto &type : types) + { + statement(type, " mod(", type, " x, ", type, " y)"); + begin_scope(); + statement("return x - y * floor(x / y);"); + end_scope(); + statement(""); + } + } + + emit_texture_size_variants(required_texture_size_variants.srv, "4", false, ""); + for (uint32_t norm = 0; norm < 3; norm++) + { + for (uint32_t comp = 0; comp < 4; comp++) + { + static const char *qualifiers[] = { "", "unorm ", "snorm " }; + static const char *vecsizes[] = { "", "2", "3", "4" }; + emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true, + qualifiers[norm]); + } + } + + if (requires_fp16_packing) + { + // HLSL does not pack into a single word sadly :( + statement("uint spvPackHalf2x16(float2 value)"); + begin_scope(); + statement("uint2 Packed = f32tof16(value);"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("float2 spvUnpackHalf2x16(uint value)"); + begin_scope(); + statement("return f16tof32(uint2(value & 0xffff, value >> 16));"); + end_scope(); + statement(""); + } + + if (requires_uint2_packing) + { + statement("uint64_t spvPackUint2x32(uint2 value)"); + begin_scope(); + statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);"); + end_scope(); + statement(""); + + statement("uint2 spvUnpackUint2x32(uint64_t value)"); + begin_scope(); + statement("uint2 Unpacked;"); + statement("Unpacked.x = uint(value & 0xffffffff);"); + statement("Unpacked.y = uint(value >> 32);"); + statement("return Unpacked;"); + end_scope(); + statement(""); + } + + if (requires_explicit_fp16_packing) + { + // HLSL does not pack into a single word sadly :( + statement("uint spvPackFloat2x16(min16float2 value)"); + begin_scope(); + statement("uint2 Packed = f32tof16(value);"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("min16float2 spvUnpackFloat2x16(uint value)"); + begin_scope(); + statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); + end_scope(); + statement(""); + } + + // HLSL does not seem to have builtins for these operation, so roll them by hand ... + if (requires_unorm8_packing) + { + statement("uint spvPackUnorm4x8(float4 value)"); + begin_scope(); + statement("uint4 Packed = uint4(round(saturate(value) * 255.0));"); + statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);"); + end_scope(); + statement(""); + + statement("float4 spvUnpackUnorm4x8(uint value)"); + begin_scope(); + statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);"); + statement("return float4(Packed) / 255.0;"); + end_scope(); + statement(""); + } + + if (requires_snorm8_packing) + { + statement("uint spvPackSnorm4x8(float4 value)"); + begin_scope(); + statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;"); + statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));"); + end_scope(); + statement(""); + + statement("float4 spvUnpackSnorm4x8(uint value)"); + begin_scope(); + statement("int SignedValue = int(value);"); + statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;"); + statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);"); + end_scope(); + statement(""); + } + + if (requires_unorm16_packing) + { + statement("uint spvPackUnorm2x16(float2 value)"); + begin_scope(); + statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("float2 spvUnpackUnorm2x16(uint value)"); + begin_scope(); + statement("uint2 Packed = uint2(value & 0xffff, value >> 16);"); + statement("return float2(Packed) / 65535.0;"); + end_scope(); + statement(""); + } + + if (requires_snorm16_packing) + { + statement("uint spvPackSnorm2x16(float2 value)"); + begin_scope(); + statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;"); + statement("return uint(Packed.x | (Packed.y << 16));"); + end_scope(); + statement(""); + + statement("float2 spvUnpackSnorm2x16(uint value)"); + begin_scope(); + statement("int SignedValue = int(value);"); + statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;"); + statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);"); + end_scope(); + statement(""); + } + + if (requires_bitfield_insert) + { + static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : types) + { + statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); + statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); + end_scope(); + statement(""); + } + } + + if (requires_bitfield_extract) + { + static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : unsigned_types) + { + statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); + statement("return (Base >> Offset) & Mask;"); + end_scope(); + statement(""); + } + + // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down. + static const char *signed_types[] = { "int", "int2", "int3", "int4" }; + for (auto &type : signed_types) + { + statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)"); + begin_scope(); + statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); + statement(type, " Masked = (Base >> Offset) & Mask;"); + statement("int ExtendShift = (32 - Count) & 31;"); + statement("return (Masked << ExtendShift) >> ExtendShift;"); + end_scope(); + statement(""); + } + } + + if (requires_inverse_2x2) + { + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float2x2 spvInverse(float2x2 m)"); + begin_scope(); + statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = m[1][1];"); + statement("adj[0][1] = -m[0][1];"); + statement_no_indent(""); + statement("adj[1][0] = -m[1][0];"); + statement("adj[1][1] = m[0][0];"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_inverse_3x3) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement_no_indent(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float3x3 spvInverse(float3x3 m)"); + begin_scope(); + statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); + statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); + statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); + statement_no_indent(""); + statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); + statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); + statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); + statement_no_indent(""); + statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); + statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); + statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_inverse_4x4) + { + if (!requires_inverse_3x3) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement(""); + } + + statement("// Returns the determinant of a 3x3 matrix."); + statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + "float c2, float c3)"); + begin_scope(); + statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * " + "spvDet2x2(a2, a3, " + "b2, b3);"); + end_scope(); + statement_no_indent(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float4x4 spvInverse(float4x4 m)"); + begin_scope(); + statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement( + "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " + "m[2][2]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] " + "* m[3][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_scalar_reflect) + { + // FP16/FP64? No templates in HLSL. + statement("float spvReflect(float i, float n)"); + begin_scope(); + statement("return i - 2.0 * dot(n, i) * n;"); + end_scope(); + statement(""); + } + + if (requires_scalar_refract) + { + // FP16/FP64? No templates in HLSL. + statement("float spvRefract(float i, float n, float eta)"); + begin_scope(); + statement("float NoI = n * i;"); + statement("float NoI2 = NoI * NoI;"); + statement("float k = 1.0 - eta * eta * (1.0 - NoI2);"); + statement("if (k < 0.0)"); + begin_scope(); + statement("return 0.0;"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + } + + if (requires_scalar_faceforward) + { + // FP16/FP64? No templates in HLSL. + statement("float spvFaceForward(float n, float i, float nref)"); + begin_scope(); + statement("return i * nref < 0.0 ? n : -n;"); + end_scope(); + statement(""); + } +} + +void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav, + const char *type_qualifier) +{ + if (variant_mask == 0) + return; + + static const char *types[QueryTypeCount] = { "float", "int", "uint" }; + static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", + "Texture3D", "Buffer", "TextureCube", "TextureCubeArray", + "Texture2DMS", "Texture2DMSArray" }; + + static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false }; + + static const char *ret_types[QueryDimCount] = { + "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3", + }; + + static const uint32_t return_arguments[QueryDimCount] = { + 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, + }; + + for (uint32_t index = 0; index < QueryDimCount; index++) + { + for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++) + { + uint32_t bit = 16 * type_index + index; + uint64_t mask = 1ull << bit; + + if ((variant_mask & mask) == 0) + continue; + + statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""), + dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ", + (uav ? "" : "uint Level, "), "out uint Param)"); + begin_scope(); + statement(ret_types[index], " ret;"); + switch (return_arguments[index]) + { + case 1: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, Param);"); + else + { + statement("Tex.GetDimensions(ret.x);"); + statement("Param = 0u;"); + } + break; + case 2: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);"); + else if (!uav) + statement("Tex.GetDimensions(ret.x, ret.y, Param);"); + else + { + statement("Tex.GetDimensions(ret.x, ret.y);"); + statement("Param = 0u;"); + } + break; + case 3: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);"); + else if (!uav) + statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);"); + else + { + statement("Tex.GetDimensions(ret.x, ret.y, ret.z);"); + statement("Param = 0u;"); + } + break; + } + + statement("return ret;"); + end_scope(); + statement(""); + } + } +} + +string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) +{ + auto &flags = get_member_decoration_bitset(type.self, index); + + // HLSL can emit row_major or column_major decoration in any struct. + // Do not try to merge combined decorations for children like in GLSL. + + // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major". + // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout. + if (flags.get(DecorationColMajor)) + return "row_major "; + else if (flags.get(DecorationRowMajor)) + return "column_major "; + + return ""; +} + +void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t base_offset) +{ + auto &membertype = get(member_type_id); + + Bitset memberflags; + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + string qualifiers; + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); + + string packing_offset; + bool is_push_constant = type.storage == StorageClassPushConstant; + + if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) && + has_member_decoration(type.self, index, DecorationOffset)) + { + uint32_t offset = memb[index].offset - base_offset; + if (offset & 3) + SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL."); + + static const char *packing_swizzle[] = { "", ".y", ".z", ".w" }; + packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")"); + } + + statement(layout_for_member(type, index), qualifiers, qualifier, + variable_decl(membertype, to_member_name(type, index)), packing_offset, ";"); +} + +void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + + bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock); + + if (is_uav) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); + bool is_coherent = flags.get(DecorationCoherent) && !is_readonly; + bool is_interlocked = interlocked_resources.count(var.self) > 0; + const char *type_name = "ByteAddressBuffer "; + if (!is_readonly) + type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer "; + add_resource_name(var.self); + statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); + } + else + { + if (type.array.empty()) + { + // Flatten the top-level struct so we can use packoffset, + // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. + flattened_structs[var.self] = false; + + // Prefer the block name if possible. + auto buffer_name = to_name(type.self, false); + if (ir.meta[type.self].decoration.alias.empty() || + resource_names.find(buffer_name) != end(resource_names) || + block_names.find(buffer_name) != end(block_names)) + { + buffer_name = get_block_fallback_name(var.self); + } + + add_variable(block_names, resource_names, buffer_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + if (buffer_name.empty()) + buffer_name = join("_", get(var.basetype).self, "_", var.self); + + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index)) + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + else + { + SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ", + failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } + + block_names.insert(buffer_name); + + // Save for post-reflection later. + declared_block_names[var.self] = buffer_name; + + type.member_name_cache.clear(); + // var.self can be used as a backup name for the block name, + // so we need to make sure we don't disturb the name here on a recompile. + // It will need to be reset if we have to recompile. + preserve_alias_on_reset(var.self); + add_resource_name(var.self); + statement("cbuffer ", buffer_name, to_resource_binding(var)); + begin_scope(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + member_name = join(to_name(var.self), "_", member_name); + ParsedIR::sanitize_underscores(member_name); + set_member_name(type.self, i, member_name); + emit_struct_member(type, member, i, ""); + set_member_name(type.self, i, backup_name); + i++; + } + + end_scope_decl(); + statement(""); + } + else + { + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW( + "Need ConstantBuffer to use arrays of UBOs, but this is only supported in SM 5.1."); + + add_resource_name(type.self); + add_resource_name(var.self); + + // ConstantBuffer does not support packoffset, so it is unuseable unless everything aligns as we expect. + uint32_t failed_index = 0; + if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index)) + { + SPIRV_CROSS_THROW(join("HLSL ConstantBuffer ID ", var.self, " (name: ", to_name(type.self), + "), member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with normal HLSL packing rules.")); + } + + emit_struct(get(type.self)); + statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); + } + } +} + +void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) +{ + if (root_constants_layout.empty()) + { + emit_buffer_block(var); + } + else + { + for (const auto &layout : root_constants_layout) + { + auto &type = get(var.basetype); + + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start, + layout.end)) + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + else + { + SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")", + ", member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } + + flattened_structs[var.self] = false; + type.member_name_cache.clear(); + add_resource_name(var.self); + auto &memb = ir.meta[type.self].members; + + statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self), + to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space)); + begin_scope(); + + // Index of the next field in the generated root constant constant buffer + auto constant_index = 0u; + + // Iterate over all member of the push constant and check which of the fields + // fit into the given root constant layout. + for (auto i = 0u; i < memb.size(); i++) + { + const auto offset = memb[i].offset; + if (layout.start <= offset && offset < layout.end) + { + const auto &member = type.member_types[i]; + + add_member_name(type, constant_index); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + member_name = join(to_name(var.self), "_", member_name); + ParsedIR::sanitize_underscores(member_name); + set_member_name(type.self, constant_index, member_name); + emit_struct_member(type, member, i, "", layout.start); + set_member_name(type.self, constant_index, backup_name); + + constant_index++; + } + } + + end_scope_decl(); + } + } +} + +string CompilerHLSL::to_sampler_expression(uint32_t id) +{ + auto expr = join("_", to_expression(id)); + auto index = expr.find_first_of('['); + if (index == string::npos) + { + return expr + "_sampler"; + } + else + { + // We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead. + return expr.insert(index, "_sampler"); + } +} + +void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty()) + { + set(result_id, result_type, image_id, samp_id); + } + else + { + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + } +} + +string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) +{ + string arg_str = CompilerGLSL::to_func_call_arg(arg, id); + + if (hlsl_options.shader_model <= 30) + return arg_str; + + // Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL. + auto &type = expression_type(id); + + // We don't have to consider combined image samplers here via OpSampledImage because + // those variables cannot be passed as arguments to functions. + // Only global SampledImage variables may be used as arguments. + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + arg_str += ", " + to_sampler_expression(id); + + return arg_str; +} + +void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + auto &execution = get_entry_point(); + // Avoid shadow declarations. + local_variable_names = resource_names; + + string decl; + + auto &type = get(func.return_type); + if (type.array.empty()) + { + decl += flags_to_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += " "; + } + else + { + // We cannot return arrays in HLSL, so "return" through an out variable. + decl = "void "; + } + + if (func.self == ir.default_entry_point) + { + if (execution.model == ExecutionModelVertex) + decl += "vert_main"; + else if (execution.model == ExecutionModelFragment) + decl += "frag_main"; + else if (execution.model == ExecutionModelGLCompute) + decl += "comp_main"; + else + SPIRV_CROSS_THROW("Unsupported execution model."); + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + SmallVector arglist; + + if (!type.array.empty()) + { + // Fake array returns by writing to an out array instead. + string out_argument; + out_argument += "out "; + out_argument += type_to_glsl(type); + out_argument += " "; + out_argument += "spvReturnValue"; + out_argument += type_to_array_glsl(type); + arglist.push_back(move(out_argument)); + } + + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; + + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Flatten a combined sampler to two separate arguments in modern HLSL. + auto &arg_type = get(arg.type); + if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage && + arg_type.image.dim != DimBuffer) + { + // Manufacture automatic sampler arg for SampledImage texture + arglist.push_back(join(image_is_comparison(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", + to_sampler_expression(arg.id), type_to_array_glsl(arg_type))); + } + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += merge(arglist); + decl += ")"; + statement(decl); +} + +void CompilerHLSL::emit_hlsl_entry_point() +{ + SmallVector arguments; + + if (require_input) + arguments.push_back("SPIRV_Cross_Input stage_input"); + + // Add I/O blocks as separate arguments with appropriate storage qualifier. + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + if (var.storage == StorageClassInput) + { + arguments.push_back(join("in ", variable_decl(type, join("stage_input", to_name(var.self))))); + } + else if (var.storage == StorageClassOutput) + { + arguments.push_back(join("out ", variable_decl(type, join("stage_output", to_name(var.self))))); + } + } + }); + + auto &execution = get_entry_point(); + + switch (execution.model) + { + case ExecutionModelGLCompute: + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + uint32_t x = execution.workgroup_size.x; + uint32_t y = execution.workgroup_size.y; + uint32_t z = execution.workgroup_size.z; + + auto x_expr = wg_x.id ? get(wg_x.id).specialization_constant_macro_name : to_string(x); + auto y_expr = wg_y.id ? get(wg_y.id).specialization_constant_macro_name : to_string(y); + auto z_expr = wg_z.id ? get(wg_z.id).specialization_constant_macro_name : to_string(z); + + statement("[numthreads(", x_expr, ", ", y_expr, ", ", z_expr, ")]"); + break; + } + case ExecutionModelFragment: + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + statement("[earlydepthstencil]"); + break; + default: + break; + } + + statement(require_output ? "SPIRV_Cross_Output " : "void ", "main(", merge(arguments), ")"); + begin_scope(); + bool legacy = hlsl_options.shader_model <= 30; + + // Copy builtins from entry point arguments to globals. + active_input_builtins.for_each_bit([&](uint32_t i) { + auto builtin = builtin_to_glsl(static_cast(i), StorageClassInput); + switch (static_cast(i)) + { + case BuiltInFragCoord: + // VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent. + // TODO: Do we need an option here? Any reason why a D3D9 shader would be used + // on a D3D10+ system with a different rasterization config? + if (legacy) + statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);"); + else + { + statement(builtin, " = stage_input.", builtin, ";"); + // ZW are undefined in D3D9, only do this fixup here. + statement(builtin, ".w = 1.0 / ", builtin, ".w;"); + } + break; + + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + // D3D semantics are uint, but shader wants int. + if (hlsl_options.support_nonzero_base_vertex_base_instance) + { + if (static_cast(i) == BuiltInInstanceIndex) + statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseInstance;"); + else + statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseVertex;"); + } + else + statement(builtin, " = int(stage_input.", builtin, ");"); + break; + + case BuiltInInstanceId: + // D3D semantics are uint, but shader wants int. + statement(builtin, " = int(stage_input.", builtin, ");"); + break; + + case BuiltInNumWorkgroups: + case BuiltInPointCoord: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + break; + + case BuiltInSubgroupEqMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;"); + statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;"); + statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;"); + break; + + case BuiltInSubgroupGeMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;"); + statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;"); + statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;"); + statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;"); + statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;"); + break; + + case BuiltInSubgroupGtMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("uint gt_lane_index = WaveGetLaneIndex() + 1;"); + statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);"); + statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;"); + statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;"); + statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;"); + statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;"); + statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;"); + statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;"); + statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;"); + break; + + case BuiltInSubgroupLeMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("uint le_lane_index = WaveGetLaneIndex() + 1;"); + statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;"); + statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;"); + statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;"); + statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;"); + statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;"); + statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;"); + statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;"); + statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;"); + break; + + case BuiltInSubgroupLtMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;"); + statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;"); + statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;"); + statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;"); + statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;"); + break; + + case BuiltInClipDistance: + for (uint32_t clip = 0; clip < clip_distance_count; clip++) + statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], + ";"); + break; + + case BuiltInCullDistance: + for (uint32_t cull = 0; cull < cull_distance_count; cull++) + statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], + ";"); + break; + + default: + statement(builtin, " = stage_input.", builtin, ";"); + break; + } + }); + + // Copy from stage input struct to globals. + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassInput) + return; + + bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; + + if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + auto &mtype = this->get(var.basetype); + if (need_matrix_unroll && mtype.columns > 1) + { + // Unroll matrices. + for (uint32_t col = 0; col < mtype.columns; col++) + statement(name, "[", col, "] = stage_input.", name, "_", col, ";"); + } + else + { + statement(name, " = stage_input.", name, ";"); + } + } + + // I/O blocks don't use the common stage input/output struct, but separate outputs. + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + statement(name, " = stage_input", name, ";"); + } + }); + + // Run the shader. + if (execution.model == ExecutionModelVertex) + statement("vert_main();"); + else if (execution.model == ExecutionModelFragment) + statement("frag_main();"); + else if (execution.model == ExecutionModelGLCompute) + statement("comp_main();"); + else + SPIRV_CROSS_THROW("Unsupported shader stage."); + + // Copy block outputs. + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassOutput) + return; + + // I/O blocks don't use the common stage input/output struct, but separate outputs. + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + statement("stage_output", name, " = ", name, ";"); + } + }); + + // Copy stage outputs. + if (require_output) + { + statement("SPIRV_Cross_Output stage_output;"); + + // Copy builtins from globals to return struct. + active_output_builtins.for_each_bit([&](uint32_t i) { + // PointSize doesn't exist in HLSL. + if (i == BuiltInPointSize) + return; + + switch (static_cast(i)) + { + case BuiltInClipDistance: + for (uint32_t clip = 0; clip < clip_distance_count; clip++) + statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[", + clip, "];"); + break; + + case BuiltInCullDistance: + for (uint32_t cull = 0; cull < cull_distance_count; cull++) + statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[", + cull, "];"); + break; + + default: + { + auto builtin_expr = builtin_to_glsl(static_cast(i), StorageClassOutput); + statement("stage_output.", builtin_expr, " = ", builtin_expr, ";"); + break; + } + } + }); + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassOutput) + return; + + if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + + if (legacy && execution.model == ExecutionModelFragment) + { + string output_filler; + for (uint32_t size = type.vecsize; size < 4; ++size) + output_filler += ", 0.0"; + + statement("stage_output.", name, " = float4(", name, output_filler, ");"); + } + else + { + statement("stage_output.", name, " = ", name, ";"); + } + } + }); + + statement("return stage_output;"); + } + + end_scope(); +} + +void CompilerHLSL::emit_fixup() +{ + if (is_vertex_like_shader()) + { + // Do various mangling on the gl_Position. + if (hlsl_options.shader_model <= 30) + { + statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * " + "gl_Position.w;"); + statement("gl_Position.y = gl_Position.y + gl_HalfPixel.y * " + "gl_Position.w;"); + } + + if (options.vertex.flip_vert_y) + statement("gl_Position.y = -gl_Position.y;"); + if (options.vertex.fixup_clipspace) + statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;"); + } +} + +void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) +{ + if (sparse) + SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL."); + + auto *ops = stream(i); + auto op = static_cast(i.op); + uint32_t length = i.length; + + SmallVector inherited_expressions; + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + VariableID img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + const uint32_t *opt = nullptr; + auto *combined_image = maybe_get(img); + auto img_expr = to_expression(combined_image ? combined_image->image : img); + + inherited_expressions.push_back(coord); + + // Make sure non-uniform decoration is back-propagated to where it needs to be. + if (has_decoration(img, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(img); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + dref = ops[4]; + proj = true; + opt = &ops[5]; + length -= 5; + break; + + case OpImageDrefGather: + dref = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageGather: + comp = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; + + case OpImageQueryLod: + opt = &ops[4]; + length -= 4; + break; + + default: + opt = &ops[4]; + length -= 4; + break; + } + + auto &imgtype = expression_type(img); + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: + coord_components = 1; + break; + case spv::Dim2D: + coord_components = 2; + break; + case spv::Dim3D: + coord_components = 3; + break; + case spv::DimCube: + coord_components = 3; + break; + case spv::DimBuffer: + coord_components = 1; + break; + default: + coord_components = 2; + break; + } + + if (dref) + inherited_expressions.push_back(dref); + + if (imgtype.image.arrayed) + coord_components++; + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t minlod = 0; + uint32_t flags = 0; + + if (length) + { + flags = opt[0]; + opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + inherited_expressions.push_back(v); + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + test(minlod, ImageOperandsMinLodMask); + + string expr; + string texop; + + if (minlod != 0) + SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL."); + + if (op == OpImageFetch) + { + if (hlsl_options.shader_model < 40) + { + SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3."); + } + texop += img_expr; + texop += ".Load"; + } + else if (op == OpImageQueryLod) + { + texop += img_expr; + texop += ".CalculateLevelOfDetail"; + } + else + { + auto &imgformat = get(imgtype.image.type); + if (imgformat.basetype != SPIRType::Float) + { + SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL."); + } + + if (hlsl_options.shader_model >= 40) + { + texop += img_expr; + + if (image_is_comparison(imgtype, img)) + { + if (gather) + { + SPIRV_CROSS_THROW("GatherCmp does not exist in HLSL."); + } + else if (lod || grad_x || grad_y) + { + // Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero. + texop += ".SampleCmpLevelZero"; + } + else + texop += ".SampleCmp"; + } + else if (gather) + { + uint32_t comp_num = evaluate_constant_u32(comp); + if (hlsl_options.shader_model >= 50) + { + switch (comp_num) + { + case 0: + texop += ".GatherRed"; + break; + case 1: + texop += ".GatherGreen"; + break; + case 2: + texop += ".GatherBlue"; + break; + case 3: + texop += ".GatherAlpha"; + break; + default: + SPIRV_CROSS_THROW("Invalid component."); + } + } + else + { + if (comp_num == 0) + texop += ".Gather"; + else + SPIRV_CROSS_THROW("HLSL shader model 4 can only gather from the red component."); + } + } + else if (bias) + texop += ".SampleBias"; + else if (grad_x || grad_y) + texop += ".SampleGrad"; + else if (lod) + texop += ".SampleLevel"; + else + texop += ".Sample"; + } + else + { + switch (imgtype.image.dim) + { + case Dim1D: + texop += "tex1D"; + break; + case Dim2D: + texop += "tex2D"; + break; + case Dim3D: + texop += "tex3D"; + break; + case DimCube: + texop += "texCUBE"; + break; + case DimRect: + case DimBuffer: + case DimSubpassData: + SPIRV_CROSS_THROW("Buffer texture support is not yet implemented for HLSL"); // TODO + default: + SPIRV_CROSS_THROW("Invalid dimension."); + } + + if (gather) + SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3."); + if (offset || coffset) + SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3."); + + if (grad_x || grad_y) + texop += "grad"; + else if (lod) + texop += "lod"; + else if (bias) + texop += "bias"; + else if (proj || dref) + texop += "proj"; + } + } + + expr += texop; + expr += "("; + if (hlsl_options.shader_model < 40) + { + if (combined_image) + SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3."); + expr += to_expression(img); + } + else if (op != OpImageFetch) + { + string sampler_expr; + if (combined_image) + sampler_expr = to_expression(combined_image->sampler); + else + sampler_expr = to_sampler_expression(img); + expr += sampler_expr; + } + + auto swizzle = [](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; + + switch (comps) + { + case 1: + return ".x"; + case 2: + return ".xy"; + case 3: + return ".xyz"; + default: + return ""; + } + }; + + bool forward = should_forward(coord); + + // The IR can give us more components than we need, so chop them off as needed. + string coord_expr; + auto &coord_type = expression_type(coord); + if (coord_components != coord_type.vecsize) + coord_expr = to_enclosed_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize); + else + coord_expr = to_expression(coord); + + if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us. + coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components); + + if (hlsl_options.shader_model < 40) + { + if (dref) + { + if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D) + { + SPIRV_CROSS_THROW( + "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3."); + } + + if (grad_x || grad_y) + SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3."); + + for (uint32_t size = coord_components; size < 2; ++size) + coord_expr += ", 0.0"; + + forward = forward && should_forward(dref); + coord_expr += ", " + to_expression(dref); + } + else if (lod || bias || proj) + { + for (uint32_t size = coord_components; size < 3; ++size) + coord_expr += ", 0.0"; + } + + if (lod) + { + coord_expr = "float4(" + coord_expr + ", " + to_expression(lod) + ")"; + } + else if (bias) + { + coord_expr = "float4(" + coord_expr + ", " + to_expression(bias) + ")"; + } + else if (proj) + { + coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")"; + } + else if (dref) + { + // A "normal" sample gets fed into tex2Dproj as well, because the + // regular tex2D accepts only two coordinates. + coord_expr = "float4(" + coord_expr + ", 1.0)"; + } + + if (!!lod + !!bias + !!proj > 1) + SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers."); + } + + if (op == OpImageFetch) + { + if (imgtype.image.dim != DimBuffer && !imgtype.image.ms) + coord_expr = + join("int", coord_components + 1, "(", coord_expr, ", ", lod ? to_expression(lod) : string("0"), ")"); + } + else + expr += ", "; + expr += coord_expr; + + if (dref && hlsl_options.shader_model >= 40) + { + forward = forward && should_forward(dref); + expr += ", "; + + if (proj) + expr += to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, coord_components); + else + expr += to_expression(dref); + } + + if (!dref && (grad_x || grad_y)) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + expr += ", "; + expr += to_expression(grad_x); + expr += ", "; + expr += to_expression(grad_y); + } + + if (!dref && lod && hlsl_options.shader_model >= 40 && op != OpImageFetch) + { + forward = forward && should_forward(lod); + expr += ", "; + expr += to_expression(lod); + } + + if (!dref && bias && hlsl_options.shader_model >= 40) + { + forward = forward && should_forward(bias); + expr += ", "; + expr += to_expression(bias); + } + + if (coffset) + { + forward = forward && should_forward(coffset); + expr += ", "; + expr += to_expression(coffset); + } + else if (offset) + { + forward = forward && should_forward(offset); + expr += ", "; + expr += to_expression(offset); + } + + if (sample) + { + expr += ", "; + expr += to_expression(sample); + } + + expr += ")"; + + if (dref && hlsl_options.shader_model < 40) + expr += ".x"; + + if (op == OpImageQueryLod) + { + // This is rather awkward. + // textureQueryLod returns two values, the "accessed level", + // as well as the actual LOD lambda. + // As far as I can tell, there is no way to get the .x component + // according to GLSL spec, and it depends on the sampler itself. + // Just assume X == Y, so we will need to splat the result to a float2. + statement("float _", id, "_tmp = ", expr, ";"); + statement("float2 _", id, " = _", id, "_tmp.xx;"); + set(id, join("_", id), result_type, true); + } + else + { + emit_op(result_type, id, expr, forward, false); + } + + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); + break; + + default: + break; + } +} + +string CompilerHLSL::to_resource_binding(const SPIRVariable &var) +{ + const auto &type = get(var.basetype); + + // We can remap push constant blocks, even if they don't have any binding decoration. + if (type.storage != StorageClassPushConstant && !has_decoration(var.self, DecorationBinding)) + return ""; + + char space = '\0'; + + HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT; + + switch (type.basetype) + { + case SPIRType::SampledImage: + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + break; + + case SPIRType::Image: + if (type.image.sampled == 2 && type.image.dim != DimSubpassData) + { + if (has_decoration(var.self, DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv) + { + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + } + else + { + space = 'u'; // UAV + resource_flags = HLSL_BINDING_AUTO_UAV_BIT; + } + } + else + { + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + } + break; + + case SPIRType::Sampler: + space = 's'; + resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT; + break; + + case SPIRType::Struct: + { + auto storage = type.storage; + if (storage == StorageClassUniform) + { + if (has_decoration(type.self, DecorationBufferBlock)) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); + space = is_readonly ? 't' : 'u'; // UAV + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; + } + else if (has_decoration(type.self, DecorationBlock)) + { + space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_CBV_BIT; + } + } + else if (storage == StorageClassPushConstant) + { + space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT; + } + else if (storage == StorageClassStorageBuffer) + { + // UAV or SRV depending on readonly flag. + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); + space = is_readonly ? 't' : 'u'; + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; + } + + break; + } + default: + break; + } + + if (!space) + return ""; + + uint32_t desc_set = + resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : 0u; + uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : 0u; + + if (has_decoration(var.self, DecorationBinding)) + binding = get_decoration(var.self, DecorationBinding); + if (has_decoration(var.self, DecorationDescriptorSet)) + desc_set = get_decoration(var.self, DecorationDescriptorSet); + + return to_resource_register(resource_flags, space, binding, desc_set); +} + +string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var) +{ + // For combined image samplers. + if (!has_decoration(var.self, DecorationBinding)) + return ""; + + return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding), + get_decoration(var.self, DecorationDescriptorSet)); +} + +void CompilerHLSL::remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding) +{ + auto itr = resource_bindings.find({ get_execution_model(), desc_set, binding }); + if (itr != end(resource_bindings)) + { + auto &remap = itr->second; + remap.second = true; + + switch (type) + { + case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT: + case HLSL_BINDING_AUTO_CBV_BIT: + desc_set = remap.first.cbv.register_space; + binding = remap.first.cbv.register_binding; + break; + + case HLSL_BINDING_AUTO_SRV_BIT: + desc_set = remap.first.srv.register_space; + binding = remap.first.srv.register_binding; + break; + + case HLSL_BINDING_AUTO_SAMPLER_BIT: + desc_set = remap.first.sampler.register_space; + binding = remap.first.sampler.register_binding; + break; + + case HLSL_BINDING_AUTO_UAV_BIT: + desc_set = remap.first.uav.register_space; + binding = remap.first.uav.register_binding; + break; + + default: + break; + } + } +} + +string CompilerHLSL::to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t space_set) +{ + if ((flag & resource_binding_flags) == 0) + { + remap_hlsl_resource_binding(flag, space_set, binding); + + // The push constant block did not have a binding, and there were no remap for it, + // so, declare without register binding. + if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet) + return ""; + + if (hlsl_options.shader_model >= 51) + return join(" : register(", space, binding, ", space", space_set, ")"); + else + return join(" : register(", space, binding, ")"); + } + else + return ""; +} + +void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + switch (type.basetype) + { + case SPIRType::SampledImage: + case SPIRType::Image: + { + bool is_coherent = false; + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + is_coherent = has_decoration(var.self, DecorationCoherent); + + statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ", + to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + { + // For combined image samplers, also emit a combined image sampler. + if (image_is_comparison(type, var.self)) + statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type), + to_resource_binding_sampler(var), ";"); + else + statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type), + to_resource_binding_sampler(var), ";"); + } + break; + } + + case SPIRType::Sampler: + if (comparison_ids.count(var.self)) + statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), + ";"); + else + statement("SamplerState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + break; + + default: + statement(variable_decl(var), to_resource_binding(var), ";"); + break; + } +} + +void CompilerHLSL::emit_legacy_uniform(const SPIRVariable &var) +{ + auto &type = get(var.basetype); + switch (type.basetype) + { + case SPIRType::Sampler: + case SPIRType::Image: + SPIRV_CROSS_THROW("Separate image and samplers not supported in legacy HLSL."); + + default: + statement(variable_decl(var), ";"); + break; + } +} + +void CompilerHLSL::emit_uniform(const SPIRVariable &var) +{ + add_resource_name(var.self); + if (hlsl_options.shader_model >= 40) + emit_modern_uniform(var); + else + emit_legacy_uniform(var); +} + +bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t) +{ + return false; +} + +string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + return "asuint"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + return "asint"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + return "asfloat"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + return "asfloat"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + SPIRV_CROSS_THROW("Double to Int64 is not supported in HLSL."); + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + SPIRV_CROSS_THROW("Double to UInt64 is not supported in HLSL."); + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "asdouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "asdouble"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile(); + } + return "spvUnpackFloat2x16"; + } + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile(); + } + return "spvPackFloat2x16"; + } + else + return ""; +} + +void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) +{ + auto op = static_cast(eop); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "rsqrt"); + break; + + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "frac"); + break; + + case GLSLstd450RoundEven: + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3."); + emit_unary_func_op(result_type, id, args[0], "round"); + break; + + case GLSLstd450Acosh: + case GLSLstd450Asinh: + case GLSLstd450Atanh: + SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL."); + + case GLSLstd450FMix: + case GLSLstd450IMix: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp"); + break; + + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + break; + + case GLSLstd450Fma: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mad"); + break; + + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "EvaluateAttributeAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeSnapped"); + break; + + case GLSLstd450PackHalf2x16: + if (!requires_fp16_packing) + { + requires_fp16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16"); + break; + + case GLSLstd450UnpackHalf2x16: + if (!requires_fp16_packing) + { + requires_fp16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16"); + break; + + case GLSLstd450PackSnorm4x8: + if (!requires_snorm8_packing) + { + requires_snorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8"); + break; + + case GLSLstd450UnpackSnorm4x8: + if (!requires_snorm8_packing) + { + requires_snorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8"); + break; + + case GLSLstd450PackUnorm4x8: + if (!requires_unorm8_packing) + { + requires_unorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8"); + break; + + case GLSLstd450UnpackUnorm4x8: + if (!requires_unorm8_packing) + { + requires_unorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8"); + break; + + case GLSLstd450PackSnorm2x16: + if (!requires_snorm16_packing) + { + requires_snorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16"); + break; + + case GLSLstd450UnpackSnorm2x16: + if (!requires_snorm16_packing) + { + requires_snorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16"); + break; + + case GLSLstd450PackUnorm2x16: + if (!requires_unorm16_packing) + { + requires_unorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16"); + break; + + case GLSLstd450UnpackUnorm2x16: + if (!requires_unorm16_packing) + { + requires_unorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16"); + break; + + case GLSLstd450PackDouble2x32: + case GLSLstd450UnpackDouble2x32: + SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL."); + + case GLSLstd450FindILsb: + { + auto basetype = expression_type(args[0]).basetype; + emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype); + break; + } + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", uint_type, uint_type); + break; + + case GLSLstd450MatrixInverse: + { + auto &type = get(result_type); + if (type.vecsize == 2 && type.columns == 2) + { + if (!requires_inverse_2x2) + { + requires_inverse_2x2 = true; + force_recompile(); + } + } + else if (type.vecsize == 3 && type.columns == 3) + { + if (!requires_inverse_3x3) + { + requires_inverse_3x3 = true; + force_recompile(); + } + } + else if (type.vecsize == 4 && type.columns == 4) + { + if (!requires_inverse_4x4) + { + requires_inverse_4x4 = true; + force_recompile(); + } + } + emit_unary_func_op(result_type, id, args[0], "spvInverse"); + break; + } + + case GLSLstd450Normalize: + // HLSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Returns -1 or 1 for valid input, sign() does the job. + emit_unary_func_op(result_type, id, args[0], "sign"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Reflect: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_reflect) + { + requires_scalar_reflect = true; + force_recompile(); + } + emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Refract: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_refract) + { + requires_scalar_refract = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450FaceForward: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_faceforward) + { + requires_scalar_faceforward = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + default: + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } +} + +void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain) +{ + auto &type = get(chain.basetype); + + // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. + auto ident = get_unique_identifier(); + + statement("[unroll]"); + statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", + ident, "++)"); + begin_scope(); + auto subchain = chain; + subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); + subchain.basetype = type.parent_type; + if (!get(subchain.basetype).array.empty()) + subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); + read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain); + end_scope(); +} + +void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain) +{ + auto &type = get(chain.basetype); + auto subchain = chain; + uint32_t member_count = uint32_t(type.member_types.size()); + + for (uint32_t i = 0; i < member_count; i++) + { + uint32_t offset = type_struct_member_offset(type, i); + subchain.static_index = chain.static_index + offset; + subchain.basetype = type.member_types[i]; + + subchain.matrix_stride = 0; + subchain.array_stride = 0; + subchain.row_major_matrix = false; + + auto &member_type = get(subchain.basetype); + if (member_type.columns > 1) + { + subchain.matrix_stride = type_struct_member_matrix_stride(type, i); + subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); + } + + if (!member_type.array.empty()) + subchain.array_stride = type_struct_member_array_stride(type, i); + + read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain); + } +} + +void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIRAccessChain &chain) +{ + auto &type = get(chain.basetype); + + SPIRType target_type; + target_type.basetype = SPIRType::UInt; + target_type.vecsize = type.vecsize; + target_type.columns = type.columns; + + if (!type.array.empty()) + { + read_access_chain_array(lhs, chain); + return; + } + else if (type.basetype == SPIRType::Struct) + { + read_access_chain_struct(lhs, chain); + return; + } + else if (type.width != 32 && !hlsl_options.enable_16bit_types) + SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and " + "native 16-bit types are enabled."); + + bool templated_load = hlsl_options.shader_model >= 62; + string load_expr; + + string template_expr; + if (templated_load) + template_expr = join("<", type_to_glsl(type), ">"); + + // Load a vector or scalar. + if (type.columns == 1 && !chain.row_major_matrix) + { + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + if (templated_load) + load_op = "Load"; + + load_expr = join(chain.base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")"); + } + else if (type.columns == 1) + { + // Strided load since we are loading a column from a row-major matrix. + if (templated_load) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + if (type.vecsize > 1) + load_expr += type_to_glsl(type) + "("; + } + else if (type.vecsize > 1) + { + load_expr = type_to_glsl(target_type); + load_expr += "("; + } + + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += join(chain.base, ".Load", template_expr, "(", chain.dynamic_index, + chain.static_index + r * chain.matrix_stride, ")"); + if (r + 1 < type.vecsize) + load_expr += ", "; + } + + if (type.vecsize > 1) + load_expr += ")"; + } + else if (!chain.row_major_matrix) + { + // Load a matrix, column-major, the easy case. + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + if (templated_load) + { + auto vector_type = type; + vector_type.columns = 1; + template_expr = join("<", type_to_glsl(vector_type), ">"); + load_expr = type_to_glsl(type); + load_op = "Load"; + } + else + { + // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend, + // so row-major is technically column-major ... + load_expr = type_to_glsl(target_type); + } + load_expr += "("; + + for (uint32_t c = 0; c < type.columns; c++) + { + load_expr += join(chain.base, ".", load_op, template_expr, "(", chain.dynamic_index, + chain.static_index + c * chain.matrix_stride, ")"); + if (c + 1 < type.columns) + load_expr += ", "; + } + load_expr += ")"; + } + else + { + // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern + // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ... + + if (templated_load) + { + load_expr = type_to_glsl(type); + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } + else + load_expr = type_to_glsl(target_type); + + load_expr += "("; + + for (uint32_t c = 0; c < type.columns; c++) + { + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += join(chain.base, ".Load", template_expr, "(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")"); + + if ((r + 1 < type.vecsize) || (c + 1 < type.columns)) + load_expr += ", "; + } + } + load_expr += ")"; + } + + if (!templated_load) + { + auto bitcast_op = bitcast_glsl_op(type, target_type); + if (!bitcast_op.empty()) + load_expr = join(bitcast_op, "(", load_expr, ")"); + } + + if (lhs.empty()) + { + assert(expr); + *expr = move(load_expr); + } + else + statement(lhs, " = ", load_expr, ";"); +} + +void CompilerHLSL::emit_load(const Instruction &instruction) +{ + auto ops = stream(instruction); + + auto *chain = maybe_get(ops[2]); + if (chain) + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + if (has_decoration(ptr, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(ptr); + + auto &type = get(result_type); + bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct; + + if (composite_load) + { + // We cannot make this work in one single expression as we might have nested structures and arrays, + // so unroll the load to an uninitialized temporary. + emit_uninitialized_temporary_expression(result_type, id); + read_access_chain(nullptr, to_expression(id), *chain); + track_expression_read(chain->self); + } + else + { + string load_expr; + read_access_chain(&load_expr, "", *chain); + + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + if (!forward) + track_expression_read(chain->self); + + // Do not forward complex load sequences like matrices, structs and arrays. + if (type.columns > 1) + forward = false; + + auto &e = emit_op(result_type, id, load_expr, forward, true); + e.need_transpose = false; + register_read(id, ptr, forward); + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(e, chain->self); + } + } + else + CompilerGLSL::emit_instruction(instruction); +} + +void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain) +{ + auto &type = get(chain.basetype); + + // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. + auto ident = get_unique_identifier(); + + uint32_t id = ir.increase_bound_by(2); + uint32_t int_type_id = id + 1; + SPIRType int_type; + int_type.basetype = SPIRType::Int; + int_type.width = 32; + set(int_type_id, int_type); + set(id, ident, int_type_id, true); + set_name(id, ident); + suppressed_usage_tracking.insert(id); + + statement("[unroll]"); + statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", + ident, "++)"); + begin_scope(); + auto subchain = chain; + subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); + subchain.basetype = type.parent_type; + + // Forcefully allow us to use an ID here by setting MSB. + auto subcomposite_chain = composite_chain; + subcomposite_chain.push_back(0x80000000u | id); + + if (!get(subchain.basetype).array.empty()) + subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); + + write_access_chain(subchain, value, subcomposite_chain); + end_scope(); +} + +void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain) +{ + auto &type = get(chain.basetype); + uint32_t member_count = uint32_t(type.member_types.size()); + auto subchain = chain; + + auto subcomposite_chain = composite_chain; + subcomposite_chain.push_back(0); + + for (uint32_t i = 0; i < member_count; i++) + { + uint32_t offset = type_struct_member_offset(type, i); + subchain.static_index = chain.static_index + offset; + subchain.basetype = type.member_types[i]; + + subchain.matrix_stride = 0; + subchain.array_stride = 0; + subchain.row_major_matrix = false; + + auto &member_type = get(subchain.basetype); + if (member_type.columns > 1) + { + subchain.matrix_stride = type_struct_member_matrix_stride(type, i); + subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); + } + + if (!member_type.array.empty()) + subchain.array_stride = type_struct_member_array_stride(type, i); + + subcomposite_chain.back() = i; + write_access_chain(subchain, value, subcomposite_chain); + } +} + +string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector &composite_chain, + bool enclose) +{ + string ret; + if (composite_chain.empty()) + ret = to_expression(value); + else + { + AccessChainMeta meta; + ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta); + } + + if (enclose) + ret = enclose_expression(ret); + return ret; +} + +void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain) +{ + auto &type = get(chain.basetype); + + // Make sure we trigger a read of the constituents in the access chain. + track_expression_read(chain.self); + + if (has_decoration(chain.self, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(chain.self); + + SPIRType target_type; + target_type.basetype = SPIRType::UInt; + target_type.vecsize = type.vecsize; + target_type.columns = type.columns; + + if (!type.array.empty()) + { + write_access_chain_array(chain, value, composite_chain); + register_write(chain.self); + return; + } + else if (type.basetype == SPIRType::Struct) + { + write_access_chain_struct(chain, value, composite_chain); + register_write(chain.self); + return; + } + else if (type.width != 32 && !hlsl_options.enable_16bit_types) + SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and " + "native 16-bit types are enabled."); + + bool templated_store = hlsl_options.shader_model >= 62; + + string template_expr; + if (templated_store) + template_expr = join("<", type_to_glsl(type), ">"); + + if (type.columns == 1 && !chain.row_major_matrix) + { + const char *store_op = nullptr; + switch (type.vecsize) + { + case 1: + store_op = "Store"; + break; + case 2: + store_op = "Store2"; + break; + case 3: + store_op = "Store3"; + break; + case 4: + store_op = "Store4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + auto store_expr = write_access_chain_value(value, composite_chain, false); + + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } + else + store_op = "Store"; + statement(chain.base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ", + store_expr, ");"); + } + else if (type.columns == 1) + { + if (templated_store) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } + + // Strided store. + for (uint32_t r = 0; r < type.vecsize; r++) + { + auto store_expr = write_access_chain_value(value, composite_chain, true); + if (type.vecsize > 1) + { + store_expr += "."; + store_expr += index_to_swizzle(r); + } + remove_duplicate_swizzle(store_expr); + + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } + + statement(chain.base, ".Store", template_expr, "(", chain.dynamic_index, + chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");"); + } + } + else if (!chain.row_major_matrix) + { + const char *store_op = nullptr; + switch (type.vecsize) + { + case 1: + store_op = "Store"; + break; + case 2: + store_op = "Store2"; + break; + case 3: + store_op = "Store3"; + break; + case 4: + store_op = "Store4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + if (templated_store) + { + store_op = "Store"; + auto vector_type = type; + vector_type.columns = 1; + template_expr = join("<", type_to_glsl(vector_type), ">"); + } + + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]"); + + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } + + statement(chain.base, ".", store_op, template_expr, "(", chain.dynamic_index, + chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");"); + } + } + else + { + if (templated_store) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } + + for (uint32_t r = 0; r < type.vecsize; r++) + { + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = + join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r)); + remove_duplicate_swizzle(store_expr); + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".Store", template_expr, "(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");"); + } + } + } + + register_write(chain.self); +} + +void CompilerHLSL::emit_store(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto *chain = maybe_get(ops[0]); + if (chain) + write_access_chain(*chain, ops[1], {}); + else + CompilerGLSL::emit_instruction(instruction); +} + +void CompilerHLSL::emit_access_chain(const Instruction &instruction) +{ + auto ops = stream(instruction); + uint32_t length = instruction.length; + + bool need_byte_access_chain = false; + auto &type = expression_type(ops[2]); + const auto *chain = maybe_get(ops[2]); + + if (chain) + { + // Keep tacking on an existing access chain. + need_byte_access_chain = true; + } + else if (type.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) + { + // If we are starting to poke into an SSBO, we are dealing with ByteAddressBuffers, and we need + // to emit SPIRAccessChain rather than a plain SPIRExpression. + uint32_t chain_arguments = length - 3; + if (chain_arguments > type.array.size()) + need_byte_access_chain = true; + } + + if (need_byte_access_chain) + { + // If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block, + // and not array of SSBO. + uint32_t to_plain_buffer_length = chain ? 0u : static_cast(type.array.size()); + + auto *backing_variable = maybe_get_backing_variable(ops[2]); + + string base; + if (to_plain_buffer_length != 0) + base = access_chain(ops[2], &ops[3], to_plain_buffer_length, get(ops[0])); + else if (chain) + base = chain->base; + else + base = to_expression(ops[2]); + + // Start traversing type hierarchy at the proper non-pointer types. + auto *basetype = &get_pointee_type(type); + + // Traverse the type hierarchy down to the actual buffer types. + for (uint32_t i = 0; i < to_plain_buffer_length; i++) + { + assert(basetype->parent_type); + basetype = &get(basetype->parent_type); + } + + uint32_t matrix_stride = 0; + uint32_t array_stride = 0; + bool row_major_matrix = false; + + // Inherit matrix information. + if (chain) + { + matrix_stride = chain->matrix_stride; + row_major_matrix = chain->row_major_matrix; + array_stride = chain->array_stride; + } + + auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length], + length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, + &matrix_stride, &array_stride); + + auto &e = set(ops[1], ops[0], type.storage, base, offsets.first, offsets.second); + e.row_major_matrix = row_major_matrix; + e.matrix_stride = matrix_stride; + e.array_stride = array_stride; + e.immutable = should_forward(ops[2]); + e.loaded_from = backing_variable ? backing_variable->self : ID(0); + + if (chain) + { + e.dynamic_index += chain->dynamic_index; + e.static_index += chain->static_index; + } + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(e, ops[i]); + } + + if (has_decoration(ops[1], DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(ops[1]); + } + else + { + CompilerGLSL::emit_instruction(instruction); + } +} + +void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) +{ + const char *atomic_op = nullptr; + + string value_expr; + if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore) + value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); + + bool is_atomic_store = false; + + switch (op) + { + case OpAtomicIIncrement: + atomic_op = "InterlockedAdd"; + value_expr = "1"; + break; + + case OpAtomicIDecrement: + atomic_op = "InterlockedAdd"; + value_expr = "-1"; + break; + + case OpAtomicLoad: + atomic_op = "InterlockedAdd"; + value_expr = "0"; + break; + + case OpAtomicISub: + atomic_op = "InterlockedAdd"; + value_expr = join("-", enclose_expression(value_expr)); + break; + + case OpAtomicSMin: + case OpAtomicUMin: + atomic_op = "InterlockedMin"; + break; + + case OpAtomicSMax: + case OpAtomicUMax: + atomic_op = "InterlockedMax"; + break; + + case OpAtomicAnd: + atomic_op = "InterlockedAnd"; + break; + + case OpAtomicOr: + atomic_op = "InterlockedOr"; + break; + + case OpAtomicXor: + atomic_op = "InterlockedXor"; + break; + + case OpAtomicIAdd: + atomic_op = "InterlockedAdd"; + break; + + case OpAtomicExchange: + atomic_op = "InterlockedExchange"; + break; + + case OpAtomicStore: + atomic_op = "InterlockedExchange"; + is_atomic_store = true; + break; + + case OpAtomicCompareExchange: + if (length < 8) + SPIRV_CROSS_THROW("Not enough data for opcode."); + atomic_op = "InterlockedCompareExchange"; + value_expr = join(to_expression(ops[7]), ", ", value_expr); + break; + + default: + SPIRV_CROSS_THROW("Unknown atomic opcode."); + } + + if (is_atomic_store) + { + auto &data_type = expression_type(ops[0]); + auto *chain = maybe_get(ops[0]); + + auto &tmp_id = extra_sub_expressions[ops[0]]; + if (!tmp_id) + { + tmp_id = ir.increase_bound_by(1); + emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id); + } + + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_expression(ops[0]), ", ", to_expression(ops[3]), ", ", to_expression(tmp_id), + ");"); + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", + to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); + } + } + else + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + forced_temporaries.insert(ops[1]); + + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + auto &data_type = expression_type(ops[2]); + auto *chain = maybe_get(ops[2]); + SPIRType::BaseType expr_type; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + expr_type = data_type.basetype; + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + expr_type = SPIRType::UInt; + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, + ", ", to_name(id), ");"); + } + + auto expr = bitcast_expression(type, expr_type, to_name(id)); + set(id, expr, result_type, true); + } + flush_all_atomic_capable_variables(); +} + +void CompilerHLSL::emit_subgroup_op(const Instruction &i) +{ + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher."); + + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + const auto make_inclusive_Sum = [&](const string &expr) -> string { + return join(expr, " + ", to_expression(ops[4])); + }; + + const auto make_inclusive_Product = [&](const string &expr) -> string { + return join(expr, " * ", to_expression(ops[4])); + }; + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + +#define make_inclusive_BitAnd(expr) "" +#define make_inclusive_BitOr(expr) "" +#define make_inclusive_BitXor(expr) "" +#define make_inclusive_Min(expr) "" +#define make_inclusive_Max(expr) "" + + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "WaveIsFirstLane()", true); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "WaveReadLaneFirst"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveBallot"); + break; + + case OpGroupNonUniformInverseBallot: + SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL."); + break; + + case OpGroupNonUniformBallotBitExtract: + SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL."); + break; + + case OpGroupNonUniformBallotFindLSB: + SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL."); + break; + + case OpGroupNonUniformBallotFindMSB: + SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL."); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + { + bool forward = should_forward(ops[4]); + auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(", + to_enclosed_expression(ops[4]), ".y)"); + auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(", + to_enclosed_expression(ops[4]), ".w)"); + emit_op(result_type, id, join(left, " + ", right), forward); + inherit_expression_dependencies(id, ops[4]); + } + else if (operation == GroupOperationInclusiveScan) + SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Inclusive Scan in HLSL."); + else if (operation == GroupOperationExclusiveScan) + SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Exclusive Scan in HLSL."); + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + + case OpGroupNonUniformShuffle: + SPIRV_CROSS_THROW("Cannot trivially implement Shuffle in HLSL."); + case OpGroupNonUniformShuffleXor: + SPIRV_CROSS_THROW("Cannot trivially implement ShuffleXor in HLSL."); + case OpGroupNonUniformShuffleUp: + SPIRV_CROSS_THROW("Cannot trivially implement ShuffleUp in HLSL."); + case OpGroupNonUniformShuffleDown: + SPIRV_CROSS_THROW("Cannot trivially implement ShuffleDown in HLSL."); + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAnyTrue"); + break; + + case OpGroupNonUniformAllEqual: + { + auto &type = get(result_type); + emit_unary_func_op(result_type, id, ops[3], + type.basetype == SPIRType::Boolean ? "WaveActiveAllEqualBool" : "WaveActiveAllEqual"); + break; + } + + // clang-format off +#define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \ + else if (operation == GroupOperationInclusiveScan && supports_scan) \ + { \ + bool forward = should_forward(ops[4]); \ + emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \ + inherit_expression_dependencies(id, ops[4]); \ + } \ + else if (operation == GroupOperationExclusiveScan && supports_scan) \ + emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + +#define HLSL_GROUP_OP_CAST(op, hlsl_op, type) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + + HLSL_GROUP_OP(FAdd, Sum, true) + HLSL_GROUP_OP(FMul, Product, true) + HLSL_GROUP_OP(FMin, Min, false) + HLSL_GROUP_OP(FMax, Max, false) + HLSL_GROUP_OP(IAdd, Sum, true) + HLSL_GROUP_OP(IMul, Product, true) + HLSL_GROUP_OP_CAST(SMin, Min, int_type) + HLSL_GROUP_OP_CAST(SMax, Max, int_type) + HLSL_GROUP_OP_CAST(UMin, Min, uint_type) + HLSL_GROUP_OP_CAST(UMax, Max, uint_type) + HLSL_GROUP_OP(BitwiseAnd, BitAnd, false) + HLSL_GROUP_OP(BitwiseOr, BitOr, false) + HLSL_GROUP_OP(BitwiseXor, BitXor, false) + +#undef HLSL_GROUP_OP +#undef HLSL_GROUP_OP_CAST + // clang-format on + + case OpGroupNonUniformQuadSwap: + { + uint32_t direction = evaluate_constant_u32(ops[4]); + if (direction == 0) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX"); + else if (direction == 1) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossY"); + else if (direction == 2) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossDiagonal"); + else + SPIRV_CROSS_THROW("Invalid quad swap direction."); + break; + } + + case OpGroupNonUniformQuadBroadcast: + { + emit_binary_func_op(result_type, id, ops[3], ops[4], "QuadReadLaneAt"); + break; + } + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + +void CompilerHLSL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + +#define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define HLSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define HLSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define HLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (opcode) + { + case OpAccessChain: + case OpInBoundsAccessChain: + { + emit_access_chain(instruction); + break; + } + case OpBitcast: + { + auto bitcast_type = get_bitcast_type(ops[0], ops[2]); + if (bitcast_type == CompilerHLSL::TypeNormal) + CompilerGLSL::emit_instruction(instruction); + else + { + if (!requires_uint2_packing) + { + requires_uint2_packing = true; + force_recompile(); + } + + if (bitcast_type == CompilerHLSL::TypePackUint2x32) + emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32"); + else + emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32"); + } + + break; + } + + case OpStore: + { + emit_store(instruction); + break; + } + + case OpLoad: + { + emit_load(instruction); + break; + } + + case OpMatrixTimesVector: + { + // Matrices are kept in a transposed state all the time, flip multiplication order always. + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpVectorTimesMatrix: + { + // Matrices are kept in a transposed state all the time, flip multiplication order always. + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpMatrixTimesMatrix: + { + // Matrices are kept in a transposed state all the time, flip multiplication order always. + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpOuterProduct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; + + auto &type = get(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + case OpFMod: + { + if (!requires_op_fmod) + { + requires_op_fmod = true; + force_recompile(); + } + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpFRem: + emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod"); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *combined = maybe_get(ops[2]); + + if (combined) + { + auto &e = emit_op(result_type, id, to_expression(combined->image), true, true); + auto *var = maybe_get_backing_variable(combined->image); + if (var) + e.loaded_from = var->self; + } + else + { + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + e.loaded_from = var->self; + } + break; + } + + case OpDPdx: + HLSL_UFOP(ddx); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + HLSL_UFOP(ddy); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxFine: + HLSL_UFOP(ddx_fine); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyFine: + HLSL_UFOP(ddy_fine); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxCoarse: + HLSL_UFOP(ddx_coarse); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyCoarse: + HLSL_UFOP(ddy_coarse); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + HLSL_UFOP(fwidth); + register_control_dependent_expression(ops[1]); + break; + + case OpLogicalNot: + { + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get(result_type); + + if (type.vecsize > 1) + emit_unrolled_unary_op(result_type, id, ops[2], "!"); + else + HLSL_UOP(!); + break; + } + + case OpIEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); + else + HLSL_BOP_CAST(==, int_type); + break; + } + + case OpLogicalEqual: + case OpFOrdEqual: + case OpFUnordEqual: + { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual. + // HACK: FUnordEqual will be implemented as FOrdEqual. + + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); + else + HLSL_BOP(==); + break; + } + + case OpINotEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); + else + HLSL_BOP_CAST(!=, int_type); + break; + } + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + case OpFUnordNotEqual: + { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + + // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here. + // We would need to do something like not(UnordEqual), but that cannot be expressed either. + // Adding a lot of NaN checks would be a breaking change from perspective of performance. + // SPIR-V will generally use isnan() checks when this even matters. + // HACK: FOrdNotEqual will be implemented as FUnordEqual. + + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); + else + HLSL_BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + auto type = opcode == OpUGreaterThan ? uint_type : int_type; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type); + else + HLSL_BOP_CAST(>, type); + break; + } + + case OpFOrdGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown); + else + HLSL_BOP(>); + break; + } + + case OpFUnordGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type); + else + HLSL_BOP_CAST(>=, type); + break; + } + + case OpFOrdGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown); + else + HLSL_BOP(>=); + break; + } + + case OpFUnordGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpULessThan: + case OpSLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpULessThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type); + else + HLSL_BOP_CAST(<, type); + break; + } + + case OpFOrdLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown); + else + HLSL_BOP(<); + break; + } + + case OpFUnordLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpULessThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type); + else + HLSL_BOP_CAST(<=, type); + break; + } + + case OpFOrdLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown); + else + HLSL_BOP(<=); + break; + } + + case OpFUnordLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpImageQueryLod: + emit_texture_op(instruction, false); + break; + + case OpImageQuerySizeLod: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(ops[2]); + auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); + statement("uint ", dummy_samples_levels, ";"); + + auto expr = join("spvTextureSize(", to_expression(ops[2]), ", ", + bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")"); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySize: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(ops[2]); + bool uav = expression_type(ops[2]).image.sampled == 2; + + if (const auto *var = maybe_get_backing_variable(ops[2])) + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) + uav = false; + + auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); + statement("uint ", dummy_samples_levels, ";"); + + string expr; + if (uav) + expr = join("spvImageSize(", to_expression(ops[2]), ", ", dummy_samples_levels, ")"); + else + expr = join("spvTextureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySamples: + case OpImageQueryLevels: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(ops[2]); + bool uav = expression_type(ops[2]).image.sampled == 2; + if (opcode == OpImageQueryLevels && uav) + SPIRV_CROSS_THROW("Cannot query levels for UAV images."); + + if (const auto *var = maybe_get_backing_variable(ops[2])) + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) + uav = false; + + // Keep it simple and do not emit special variants to make this look nicer ... + // This stuff is barely, if ever, used. + forced_temporaries.insert(id); + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + if (uav) + statement("spvImageSize(", to_expression(ops[2]), ", ", to_name(id), ");"); + else + statement("spvTextureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");"); + + auto &restype = get(ops[0]); + auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id)); + set(id, expr, result_type, true); + break; + } + + case OpImageRead: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *var = maybe_get_backing_variable(ops[2]); + auto &type = expression_type(ops[2]); + bool subpass_data = type.image.dim == DimSubpassData; + bool pure = false; + + string imgexpr; + + if (subpass_data) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3."); + + // Similar to GLSL, implement subpass loads using texelFetch. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || instruction.length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); + uint32_t sample = ops[5]; + imgexpr = join(to_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); + } + else + imgexpr = join(to_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); + + pure = true; + } + else + { + imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"); + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + + bool force_srv = + hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(var->self, DecorationNonWritable); + pure = force_srv; + + if (var && !subpass_data && !force_srv) + imgexpr = remap_swizzle(get(result_type), + image_format_to_components(get(var->basetype).image.format), imgexpr); + } + + if (var && var->forwardable) + { + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); + + if (!pure) + { + e.loaded_from = var->self; + if (forward) + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false); + + inherit_expression_dependencies(id, ops[2]); + if (type.image.ms) + inherit_expression_dependencies(id, ops[5]); + break; + } + + case OpImageWrite: + { + auto *var = maybe_get_backing_variable(ops[0]); + + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + auto value_expr = to_expression(ops[2]); + if (var) + { + auto &type = get(var->basetype); + auto narrowed_type = get(type.image.type); + narrowed_type.vecsize = image_format_to_components(type.image.format); + value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); + } + + statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = to_expression(ops[2]); + if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ops[2], DecorationNonUniformEXT)) + convert_non_uniform_expression(expression_type(ops[2]), expr); + expr += join("[", to_expression(ops[3]), "]"); + + auto &e = set(id, expr, result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpAtomicCompareExchange: + case OpAtomicExchange: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicIAdd: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicLoad: + case OpAtomicStore: + { + emit_atomic(ops, instruction.length, opcode); + break; + } + + case OpControlBarrier: + case OpMemoryBarrier: + { + uint32_t memory; + uint32_t semantics; + + if (opcode == OpMemoryBarrier) + { + memory = evaluate_constant_u32(ops[0]); + semantics = evaluate_constant_u32(ops[1]); + } + else + { + memory = evaluate_constant_u32(ops[1]); + semantics = evaluate_constant_u32(ops[2]); + } + + if (memory == ScopeSubgroup) + { + // No Wave-barriers in HLSL. + break; + } + + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = evaluate_constant_u32(next_ops[1]); + uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + // There is no "just execution barrier" in HLSL. + // If there are no memory semantics for next instruction, we will imply group shared memory is synced. + if (next_semantics == 0) + next_semantics = MemorySemanticsWorkgroupMemoryMask; + + bool memory_scope_covered = false; + if (next_memory == memory) + memory_scope_covered = true; + else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) + { + // If we only care about workgroup memory, either Device or Workgroup scope is fine, + // scope does not have to match. + if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && + (memory == ScopeDevice || memory == ScopeWorkgroup)) + { + memory_scope_covered = true; + } + } + else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) + { + // The control barrier has device scope, but the memory barrier just has workgroup scope. + memory_scope_covered = true; + } + + // If we have the same memory scope, and all memory types are covered, we're good. + if (memory_scope_covered && (semantics & next_semantics) == semantics) + break; + } + } + + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) + { + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + } + + if (opcode == OpControlBarrier) + { + // We cannot emit just execution barrier, for no memory semantics pick the cheapest option. + if (semantics == MemorySemanticsWorkgroupMemoryMask || semantics == 0) + statement("GroupMemoryBarrierWithGroupSync();"); + else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) + statement("DeviceMemoryBarrierWithGroupSync();"); + else + statement("AllMemoryBarrierWithGroupSync();"); + } + else + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + statement("GroupMemoryBarrier();"); + else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) + statement("DeviceMemoryBarrier();"); + else + statement("AllMemoryBarrier();"); + } + break; + } + + case OpBitFieldInsert: + { + if (!requires_bitfield_insert) + { + requires_bitfield_insert = true; + force_recompile(); + } + + auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + to_expression(ops[4]), ", ", to_expression(ops[5]), ")"); + + bool forward = + should_forward(ops[2]) && should_forward(ops[3]) && should_forward(ops[4]) && should_forward(ops[5]); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(ops[0], ops[1], expr, forward); + break; + } + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + { + if (!requires_bitfield_extract) + { + requires_bitfield_extract = true; + force_recompile(); + } + + if (opcode == OpBitFieldSExtract) + HLSL_TFOP(spvBitfieldSExtract); + else + HLSL_TFOP(spvBitfieldUExtract); + break; + } + + case OpBitCount: + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype); + break; + } + + case OpBitReverse: + HLSL_UFOP(reversebits); + break; + + case OpArrayLength: + { + auto *var = maybe_get(ops[2]); + if (!var) + SPIRV_CROSS_THROW("Array length must point directly to an SSBO block."); + + auto &type = get(var->basetype); + if (!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock)) + SPIRV_CROSS_THROW("Array length expression must point to a block type."); + + // This must be 32-bit uint, so we're good to go. + emit_uninitialized_temporary_expression(ops[0], ops[1]); + statement(to_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");"); + uint32_t offset = type_struct_member_offset(type, ops[3]); + uint32_t stride = type_struct_member_array_stride(type, ops[3]); + statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";"); + break; + } + + case OpIsHelperInvocationEXT: + SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL."); + + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1."); + break; // Nothing to do in the body + + default: + CompilerGLSL::emit_instruction(instruction); + break; + } +} + +void CompilerHLSL::require_texture_query_variant(uint32_t var_id) +{ + if (const auto *var = maybe_get_backing_variable(var_id)) + var_id = var->self; + + auto &type = expression_type(var_id); + bool uav = type.image.sampled == 2; + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var_id, DecorationNonWritable)) + uav = false; + + uint32_t bit = 0; + switch (type.image.dim) + { + case Dim1D: + bit = type.image.arrayed ? Query1DArray : Query1D; + break; + + case Dim2D: + if (type.image.ms) + bit = type.image.arrayed ? Query2DMSArray : Query2DMS; + else + bit = type.image.arrayed ? Query2DArray : Query2D; + break; + + case Dim3D: + bit = Query3D; + break; + + case DimCube: + bit = type.image.arrayed ? QueryCubeArray : QueryCube; + break; + + case DimBuffer: + bit = QueryBuffer; + break; + + default: + SPIRV_CROSS_THROW("Unsupported query type."); + } + + switch (get(type.image.type).basetype) + { + case SPIRType::Float: + bit += QueryTypeFloat; + break; + + case SPIRType::Int: + bit += QueryTypeInt; + break; + + case SPIRType::UInt: + bit += QueryTypeUInt; + break; + + default: + SPIRV_CROSS_THROW("Unsupported query type."); + } + + auto norm_state = image_format_to_normalized_state(type.image.format); + auto &variant = uav ? required_texture_size_variants + .uav[uint32_t(norm_state)][image_format_to_components(type.image.format) - 1] : + required_texture_size_variants.srv; + + uint64_t mask = 1ull << bit; + if ((variant & mask) == 0) + { + force_recompile(); + variant |= mask; + } +} + +void CompilerHLSL::set_root_constant_layouts(std::vector layout) +{ + root_constants_layout = move(layout); +} + +void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes) +{ + remap_vertex_attributes.push_back(vertex_attributes); +} + +VariableID CompilerHLSL::remap_num_workgroups_builtin() +{ + update_active_builtins(); + + if (!active_input_builtins.get(BuiltInNumWorkgroups)) + return 0; + + // Create a new, fake UBO. + uint32_t offset = ir.increase_bound_by(4); + + uint32_t uint_type_id = offset; + uint32_t block_type_id = offset + 1; + uint32_t block_pointer_type_id = offset + 2; + uint32_t variable_id = offset + 3; + + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + uint_type.vecsize = 3; + uint_type.columns = 1; + set(uint_type_id, uint_type); + + SPIRType block_type; + block_type.basetype = SPIRType::Struct; + block_type.member_types.push_back(uint_type_id); + set(block_type_id, block_type); + set_decoration(block_type_id, DecorationBlock); + set_member_name(block_type_id, 0, "count"); + set_member_decoration(block_type_id, 0, DecorationOffset, 0); + + SPIRType block_pointer_type = block_type; + block_pointer_type.pointer = true; + block_pointer_type.storage = StorageClassUniform; + block_pointer_type.parent_type = block_type_id; + auto &ptr_type = set(block_pointer_type_id, block_pointer_type); + + // Preserve self. + ptr_type.self = block_type_id; + + set(variable_id, block_pointer_type_id, StorageClassUniform); + ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups"; + + num_workgroups_builtin = variable_id; + return variable_id; +} + +void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags) +{ + resource_binding_flags = flags; +} + +void CompilerHLSL::validate_shader_model() +{ + // Check for nonuniform qualifier. + // Instead of looping over all decorations to find this, just look at capabilities. + for (auto &cap : ir.declared_capabilities) + { + switch (cap) + { + case CapabilityShaderNonUniformEXT: + case CapabilityRuntimeDescriptorArrayEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW( + "Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex."); + break; + + case CapabilityVariablePointers: + case CapabilityVariablePointersStorageBuffer: + SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL."); + + default: + break; + } + } + + if (ir.addressing_model != AddressingModelLogical) + SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL."); + + if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < 62) + SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support."); +} + +string CompilerHLSL::compile() +{ + ir.fixup_reserved_names(); + + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 450; + options.vulkan_semantics = true; + backend.float_literal_suffix = true; + backend.double_literal_suffix = false; + backend.long_long_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.int16_t_literal_suffix = ""; + backend.uint16_t_literal_suffix = "u"; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.demote_literal = "discard"; + backend.boolean_mix_function = ""; + backend.swizzle_is_function = false; + backend.shared_is_implied = true; + backend.unsized_array_supported = true; + backend.explicit_struct_type = false; + backend.use_initializer_list = true; + backend.use_constructor_splatting = false; + backend.can_swizzle_scalar = true; + backend.can_declare_struct_inline = false; + backend.can_declare_arrays_inline = false; + backend.can_return_array = false; + backend.nonuniform_qualifier = "NonUniformResourceIndex"; + backend.support_case_fallthrough = false; + + fixup_type_alias(); + reorder_type_alias(); + build_function_control_flow_graphs_and_analyze(); + validate_shader_model(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); + + // Subpass input needs SV_Position. + if (need_subpass_input) + active_input_builtins.set(BuiltInFragCoord); + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer.reset(); + + emit_header(); + emit_resources(); + + emit_function(get(ir.default_entry_point), Bitset()); + emit_hlsl_entry_point(); + + pass_count++; + } while (is_forcing_recompilation()); + + // Entry point in HLSL is always main() for the time being. + get_entry_point().name = "main"; + + return buffer.str(); +} + +void CompilerHLSL::emit_block_hints(const SPIRBlock &block) +{ + switch (block.hint) + { + case SPIRBlock::HintFlatten: + statement("[flatten]"); + break; + case SPIRBlock::HintDontFlatten: + statement("[branch]"); + break; + case SPIRBlock::HintUnroll: + statement("[unroll]"); + break; + case SPIRBlock::HintDontUnroll: + statement("[loop]"); + break; + default: + break; + } +} + +string CompilerHLSL::get_unique_identifier() +{ + return join("_", unique_identifier_count++, "ident"); +} + +void CompilerHLSL::add_hlsl_resource_binding(const HLSLResourceBinding &binding) +{ + StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; + resource_bindings[tuple] = { binding, false }; +} + +bool CompilerHLSL::is_hlsl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const +{ + StageSetBinding tuple = { model, desc_set, binding }; + auto itr = resource_bindings.find(tuple); + return itr != end(resource_bindings) && itr->second.second; +} + +CompilerHLSL::BitcastType CompilerHLSL::get_bitcast_type(uint32_t result_type, uint32_t op0) +{ + auto &rslt_type = get(result_type); + auto &expr_type = expression_type(op0); + + if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt && + expr_type.vecsize == 2) + return BitcastType::TypePackUint2x32; + else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == 2 && + expr_type.basetype == SPIRType::BaseType::UInt64) + return BitcastType::TypeUnpackUint64; + + return BitcastType::TypeNormal; +} + +bool CompilerHLSL::is_hlsl_force_storage_buffer_as_uav(ID id) const +{ + if (hlsl_options.force_storage_buffer_as_uav) + { + return true; + } + + const uint32_t desc_set = get_decoration(id, spv::DecorationDescriptorSet); + const uint32_t binding = get_decoration(id, spv::DecorationBinding); + + return (force_uav_buffer_bindings.find({ desc_set, binding }) != force_uav_buffer_bindings.end()); +} + +void CompilerHLSL::set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding) +{ + SetBindingPair pair = { desc_set, binding }; + force_uav_buffer_bindings.insert(pair); +} + +bool CompilerHLSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const +{ + return (builtin == BuiltInSampleMask); +} diff --git a/dep/spirv-cross/spirv_hlsl.hpp b/dep/spirv-cross/spirv_hlsl.hpp new file mode 100644 index 000000000..84e75e913 --- /dev/null +++ b/dep/spirv-cross/spirv_hlsl.hpp @@ -0,0 +1,374 @@ +/* + * Copyright 2016-2020 Robert Konrad + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_HLSL_HPP +#define SPIRV_HLSL_HPP + +#include "spirv_glsl.hpp" +#include + +namespace SPIRV_CROSS_NAMESPACE +{ +// Interface which remaps vertex inputs to a fixed semantic name to make linking easier. +struct HLSLVertexAttributeRemap +{ + uint32_t location; + std::string semantic; +}; +// Specifying a root constant (d3d12) or push constant range (vulkan). +// +// `start` and `end` denotes the range of the root constant in bytes. +// Both values need to be multiple of 4. +struct RootConstants +{ + uint32_t start; + uint32_t end; + + uint32_t binding; + uint32_t space; +}; + +// For finer control, decorations may be removed from specific resources instead with unset_decoration(). +enum HLSLBindingFlagBits +{ + HLSL_BINDING_AUTO_NONE_BIT = 0, + + // Push constant (root constant) resources will be declared as CBVs (b-space) without a register() declaration. + // A register will be automatically assigned by the D3D compiler, but must therefore be reflected in D3D-land. + // Push constants do not normally have a DecorationBinding set, but if they do, this can be used to ignore it. + HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, + + // cbuffer resources will be declared as CBVs (b-space) without a register() declaration. + // A register will be automatically assigned, but must be reflected in D3D-land. + HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, + + // All SRVs (t-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, + + // All UAVs (u-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, + + // All samplers (s-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, + + // No resources will be declared with register(). + HLSL_BINDING_AUTO_ALL = 0x7fffffff +}; +using HLSLBindingFlags = uint32_t; + +// By matching stage, desc_set and binding for a SPIR-V resource, +// register bindings are set based on whether the HLSL resource is a +// CBV, UAV, SRV or Sampler. A single binding in SPIR-V might contain multiple +// resource types, e.g. COMBINED_IMAGE_SAMPLER, and SRV/Sampler bindings will be used respectively. +// On SM 5.0 and lower, register_space is ignored. +// +// To remap a push constant block which does not have any desc_set/binding associated with it, +// use ResourceBindingPushConstant{DescriptorSet,Binding} as values for desc_set/binding. +// For deeper control of push constants, set_root_constant_layouts() can be used instead. +struct HLSLResourceBinding +{ + spv::ExecutionModel stage = spv::ExecutionModelMax; + uint32_t desc_set = 0; + uint32_t binding = 0; + + struct Binding + { + uint32_t register_space = 0; + uint32_t register_binding = 0; + } cbv, uav, srv, sampler; +}; + +class CompilerHLSL : public CompilerGLSL +{ +public: + struct Options + { + uint32_t shader_model = 30; // TODO: map ps_4_0_level_9_0,... somehow + + // Allows the PointSize builtin, and ignores it, as PointSize is not supported in HLSL. + bool point_size_compat = false; + + // Allows the PointCoord builtin, returns float2(0.5, 0.5), as PointCoord is not supported in HLSL. + bool point_coord_compat = false; + + // If true, the backend will assume that VertexIndex and InstanceIndex will need to apply + // a base offset, and you will need to fill in a cbuffer with offsets. + // Set to false if you know you will never use base instance or base vertex + // functionality as it might remove an internal cbuffer. + bool support_nonzero_base_vertex_base_instance = false; + + // Forces a storage buffer to always be declared as UAV, even if the readonly decoration is used. + // By default, a readonly storage buffer will be declared as ByteAddressBuffer (SRV) instead. + // Alternatively, use set_hlsl_force_storage_buffer_as_uav to specify individually. + bool force_storage_buffer_as_uav = false; + + // Forces any storage image type marked as NonWritable to be considered an SRV instead. + // For this to work with function call parameters, NonWritable must be considered to be part of the type system + // so that NonWritable image arguments are also translated to Texture rather than RWTexture. + bool nonwritable_uav_texture_as_srv = false; + + // Enables native 16-bit types. Needs SM 6.2. + // Uses half/int16_t/uint16_t instead of min16* types. + // Also adds support for 16-bit load-store from (RW)ByteAddressBuffer. + bool enable_16bit_types = false; + + // If matrices are used as IO variables, flatten the attribute declaration to use + // TEXCOORD{N,N+1,N+2,...} rather than TEXCOORDN_{0,1,2,3}. + // If add_vertex_attribute_remap is used and this feature is used, + // the semantic name will be queried once per active location. + bool flatten_matrix_vertex_input_semantics = false; + }; + + explicit CompilerHLSL(std::vector spirv_) + : CompilerGLSL(std::move(spirv_)) + { + } + + CompilerHLSL(const uint32_t *ir_, size_t size) + : CompilerGLSL(ir_, size) + { + } + + explicit CompilerHLSL(const ParsedIR &ir_) + : CompilerGLSL(ir_) + { + } + + explicit CompilerHLSL(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) + { + } + + const Options &get_hlsl_options() const + { + return hlsl_options; + } + + void set_hlsl_options(const Options &opts) + { + hlsl_options = opts; + } + + // Optionally specify a custom root constant layout. + // + // Push constants ranges will be split up according to the + // layout specified. + void set_root_constant_layouts(std::vector layout); + + // Compiles and remaps vertex attributes at specific locations to a fixed semantic. + // The default is TEXCOORD# where # denotes location. + // Matrices are unrolled to vectors with notation ${SEMANTIC}_#, where # denotes row. + // $SEMANTIC is either TEXCOORD# or a semantic name specified here. + void add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes); + std::string compile() override; + + // This is a special HLSL workaround for the NumWorkGroups builtin. + // This does not exist in HLSL, so the calling application must create a dummy cbuffer in + // which the application will store this builtin. + // The cbuffer layout will be: + // cbuffer SPIRV_Cross_NumWorkgroups : register(b#, space#) { uint3 SPIRV_Cross_NumWorkgroups_count; }; + // This must be called before compile(). + // The function returns 0 if NumWorkGroups builtin is not statically used in the shader from the current entry point. + // If non-zero, this returns the variable ID of a cbuffer which corresponds to + // the cbuffer declared above. By default, no binding or descriptor set decoration is set, + // so the calling application should declare explicit bindings on this ID before calling compile(). + VariableID remap_num_workgroups_builtin(); + + // Controls how resource bindings are declared in the output HLSL. + void set_resource_binding_flags(HLSLBindingFlags flags); + + // resource is a resource binding to indicate the HLSL CBV, SRV, UAV or sampler binding + // to use for a particular SPIR-V description set + // and binding. If resource bindings are provided, + // is_hlsl_resource_binding_used() will return true after calling ::compile() if + // the set/binding combination was used by the HLSL code. + void add_hlsl_resource_binding(const HLSLResourceBinding &resource); + bool is_hlsl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; + + // Controls which storage buffer bindings will be forced to be declared as UAVs. + void set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding); + +private: + std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; + std::string image_type_hlsl(const SPIRType &type, uint32_t id); + std::string image_type_hlsl_modern(const SPIRType &type, uint32_t id); + std::string image_type_hlsl_legacy(const SPIRType &type, uint32_t id); + void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; + void emit_hlsl_entry_point(); + void emit_header() override; + void emit_resources(); + void declare_undefined_values() override; + void emit_interface_block_globally(const SPIRVariable &type); + void emit_interface_block_in_struct(const SPIRVariable &type, std::unordered_set &active_locations); + void emit_builtin_inputs_in_struct(); + void emit_builtin_outputs_in_struct(); + void emit_texture_op(const Instruction &i, bool sparse) override; + void emit_instruction(const Instruction &instruction) override; + void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count) override; + void emit_buffer_block(const SPIRVariable &type) override; + void emit_push_constant_block(const SPIRVariable &var) override; + void emit_uniform(const SPIRVariable &var) override; + void emit_modern_uniform(const SPIRVariable &var); + void emit_legacy_uniform(const SPIRVariable &var); + void emit_specialization_constants_and_structs(); + void emit_composite_constants(); + void emit_fixup() override; + std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; + std::string layout_for_member(const SPIRType &type, uint32_t index) override; + std::string to_interpolation_qualifiers(const Bitset &flags) override; + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; + bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; + std::string to_sampler_expression(uint32_t id); + std::string to_resource_binding(const SPIRVariable &var); + std::string to_resource_binding_sampler(const SPIRVariable &var); + std::string to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t set); + void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; + void emit_access_chain(const Instruction &instruction); + void emit_load(const Instruction &instruction); + void read_access_chain(std::string *expr, const std::string &lhs, const SPIRAccessChain &chain); + void read_access_chain_struct(const std::string &lhs, const SPIRAccessChain &chain); + void read_access_chain_array(const std::string &lhs, const SPIRAccessChain &chain); + void write_access_chain(const SPIRAccessChain &chain, uint32_t value, const SmallVector &composite_chain); + void write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain); + void write_access_chain_array(const SPIRAccessChain &chain, uint32_t value, + const SmallVector &composite_chain); + std::string write_access_chain_value(uint32_t value, const SmallVector &composite_chain, bool enclose); + void emit_store(const Instruction &instruction); + void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op); + void emit_subgroup_op(const Instruction &i) override; + void emit_block_hints(const SPIRBlock &block) override; + + void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier, + uint32_t base_offset = 0) override; + + const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override; + void replace_illegal_names() override; + + bool is_hlsl_force_storage_buffer_as_uav(ID id) const; + + Options hlsl_options; + + // TODO: Refactor this to be more similar to MSL, maybe have some common system in place? + bool requires_op_fmod = false; + bool requires_fp16_packing = false; + bool requires_uint2_packing = false; + bool requires_explicit_fp16_packing = false; + bool requires_unorm8_packing = false; + bool requires_snorm8_packing = false; + bool requires_unorm16_packing = false; + bool requires_snorm16_packing = false; + bool requires_bitfield_insert = false; + bool requires_bitfield_extract = false; + bool requires_inverse_2x2 = false; + bool requires_inverse_3x3 = false; + bool requires_inverse_4x4 = false; + bool requires_scalar_reflect = false; + bool requires_scalar_refract = false; + bool requires_scalar_faceforward = false; + + struct TextureSizeVariants + { + // MSVC 2013 workaround. + TextureSizeVariants() + { + srv = 0; + for (auto &unorm : uav) + for (auto &u : unorm) + u = 0; + } + uint64_t srv; + uint64_t uav[3][4]; + } required_texture_size_variants; + + void require_texture_query_variant(uint32_t var_id); + void emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav, + const char *type_qualifier); + + enum TextureQueryVariantDim + { + Query1D = 0, + Query1DArray, + Query2D, + Query2DArray, + Query3D, + QueryBuffer, + QueryCube, + QueryCubeArray, + Query2DMS, + Query2DMSArray, + QueryDimCount + }; + + enum TextureQueryVariantType + { + QueryTypeFloat = 0, + QueryTypeInt = 16, + QueryTypeUInt = 32, + QueryTypeCount = 3 + }; + + enum BitcastType + { + TypeNormal, + TypePackUint2x32, + TypeUnpackUint64 + }; + + BitcastType get_bitcast_type(uint32_t result_type, uint32_t op0); + + void emit_builtin_variables(); + bool require_output = false; + bool require_input = false; + SmallVector remap_vertex_attributes; + + uint32_t type_to_consumed_locations(const SPIRType &type) const; + + void emit_io_block(const SPIRVariable &var); + std::string to_semantic(uint32_t location, spv::ExecutionModel em, spv::StorageClass sc); + + uint32_t num_workgroups_builtin = 0; + HLSLBindingFlags resource_binding_flags = 0; + + // Custom root constant layout, which should be emitted + // when translating push constant ranges. + std::vector root_constants_layout; + + void validate_shader_model(); + + std::string get_unique_identifier(); + uint32_t unique_identifier_count = 0; + + std::unordered_map, InternalHasher> resource_bindings; + void remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding); + + std::unordered_set force_uav_buffer_bindings; + + // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but SV_Coverage is a scalar in HLSL. + bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_msl.cpp b/dep/spirv-cross/spirv_msl.cpp new file mode 100644 index 000000000..b0709d590 --- /dev/null +++ b/dep/spirv-cross/spirv_msl.cpp @@ -0,0 +1,14792 @@ +/* + * Copyright 2016-2020 The Brenwill Workshop Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_msl.hpp" +#include "GLSL.std.450.h" + +#include +#include +#include + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +static const uint32_t k_unknown_location = ~0u; +static const uint32_t k_unknown_component = ~0u; +static const char *force_inline = "static inline __attribute__((always_inline))"; + +CompilerMSL::CompilerMSL(std::vector spirv_) + : CompilerGLSL(move(spirv_)) +{ +} + +CompilerMSL::CompilerMSL(const uint32_t *ir_, size_t word_count) + : CompilerGLSL(ir_, word_count) +{ +} + +CompilerMSL::CompilerMSL(const ParsedIR &ir_) + : CompilerGLSL(ir_) +{ +} + +CompilerMSL::CompilerMSL(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) +{ +} + +void CompilerMSL::add_msl_shader_input(const MSLShaderInput &si) +{ + inputs_by_location[si.location] = si; + if (si.builtin != BuiltInMax && !inputs_by_builtin.count(si.builtin)) + inputs_by_builtin[si.builtin] = si; +} + +void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding) +{ + StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; + resource_bindings[tuple] = { binding, false }; +} + +void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index) +{ + SetBindingPair pair = { desc_set, binding }; + buffers_requiring_dynamic_offset[pair] = { index, 0 }; +} + +void CompilerMSL::add_inline_uniform_block(uint32_t desc_set, uint32_t binding) +{ + SetBindingPair pair = { desc_set, binding }; + inline_uniform_blocks.insert(pair); +} + +void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set) +{ + if (desc_set < kMaxArgumentBuffers) + argument_buffer_discrete_mask |= 1u << desc_set; +} + +void CompilerMSL::set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage) +{ + if (desc_set < kMaxArgumentBuffers) + { + if (device_storage) + argument_buffer_device_storage_mask |= 1u << desc_set; + else + argument_buffer_device_storage_mask &= ~(1u << desc_set); + } +} + +bool CompilerMSL::is_msl_shader_input_used(uint32_t location) +{ + return inputs_in_use.count(location) != 0; +} + +bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const +{ + StageSetBinding tuple = { model, desc_set, binding }; + auto itr = resource_bindings.find(tuple); + return itr != end(resource_bindings) && itr->second.second; +} + +// Returns the size of the array of resources used by the variable with the specified id. +// The returned value is retrieved from the resource binding added using add_msl_resource_binding(). +uint32_t CompilerMSL::get_resource_array_size(uint32_t id) const +{ + StageSetBinding tuple = { get_entry_point().model, get_decoration(id, DecorationDescriptorSet), + get_decoration(id, DecorationBinding) }; + auto itr = resource_bindings.find(tuple); + return itr != end(resource_bindings) ? itr->second.first.count : 0; +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexPrimary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary); +} + +void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components) +{ + fragment_output_components[location] = components; +} + +bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const +{ + return (builtin == BuiltInSampleMask); +} + +void CompilerMSL::build_implicit_builtins() +{ + bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition); + bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex && + !msl_options.vertex_for_tessellation; + bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl; + bool need_subgroup_mask = + active_input_builtins.get(BuiltInSubgroupEqMask) || active_input_builtins.get(BuiltInSubgroupGeMask) || + active_input_builtins.get(BuiltInSubgroupGtMask) || active_input_builtins.get(BuiltInSubgroupLeMask) || + active_input_builtins.get(BuiltInSubgroupLtMask); + bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) || + active_input_builtins.get(BuiltInSubgroupGtMask)); + bool need_multiview = get_execution_model() == ExecutionModelVertex && !msl_options.view_index_from_device_index && + msl_options.multiview_layered_rendering && + (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex)); + bool need_dispatch_base = + msl_options.dispatch_base && get_execution_model() == ExecutionModelGLCompute && + (active_input_builtins.get(BuiltInWorkgroupId) || active_input_builtins.get(BuiltInGlobalInvocationId)); + bool need_grid_params = get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation; + bool need_vertex_base_params = + need_grid_params && + (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) || + active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) || + active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance)); + bool need_sample_mask = msl_options.additional_fixed_sample_mask != 0xffffffff; + bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId); + bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups); + if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || + need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id || + needs_subgroup_invocation_id || needs_subgroup_size || need_sample_mask || need_local_invocation_index || + need_workgroup_size) + { + bool has_frag_coord = false; + bool has_sample_id = false; + bool has_vertex_idx = false; + bool has_base_vertex = false; + bool has_instance_idx = false; + bool has_base_instance = false; + bool has_invocation_id = false; + bool has_primitive_id = false; + bool has_subgroup_invocation_id = false; + bool has_subgroup_size = false; + bool has_view_idx = false; + bool has_layer = false; + bool has_local_invocation_index = false; + bool has_workgroup_size = false; + uint32_t workgroup_id_type = 0; + + // FIXME: Investigate the fact that there are no checks for the entry point interface variables. + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (!ir.meta[var.self].decoration.builtin) + return; + + BuiltIn builtin = ir.meta[var.self].decoration.builtin_type; + + if (var.storage == StorageClassOutput) + { + if (need_sample_mask && builtin == BuiltInSampleMask) + { + builtin_sample_mask_id = var.self; + mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self); + does_shader_write_sample_mask = true; + } + } + + if (var.storage != StorageClassInput) + return; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (need_subpass_input && (!msl_options.use_framebuffer_fetch_subpasses)) + { + switch (builtin) + { + case BuiltInFragCoord: + mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var.self); + builtin_frag_coord_id = var.self; + has_frag_coord = true; + break; + case BuiltInLayer: + if (!msl_options.arrayed_subpass_input || msl_options.multiview) + break; + mark_implicit_builtin(StorageClassInput, BuiltInLayer, var.self); + builtin_layer_id = var.self; + has_layer = true; + break; + case BuiltInViewIndex: + if (!msl_options.multiview) + break; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self); + builtin_view_idx_id = var.self; + has_view_idx = true; + break; + default: + break; + } + } + + if ((need_sample_pos || needs_sample_id) && builtin == BuiltInSampleId) + { + builtin_sample_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var.self); + has_sample_id = true; + } + + if (need_vertex_params) + { + switch (builtin) + { + case BuiltInVertexIndex: + builtin_vertex_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var.self); + has_vertex_idx = true; + break; + case BuiltInBaseVertex: + builtin_base_vertex_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var.self); + has_base_vertex = true; + break; + case BuiltInInstanceIndex: + builtin_instance_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self); + has_instance_idx = true; + break; + case BuiltInBaseInstance: + builtin_base_instance_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self); + has_base_instance = true; + break; + default: + break; + } + } + + if (need_tesc_params) + { + switch (builtin) + { + case BuiltInInvocationId: + builtin_invocation_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var.self); + has_invocation_id = true; + break; + case BuiltInPrimitiveId: + builtin_primitive_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var.self); + has_primitive_id = true; + break; + default: + break; + } + } + + if ((need_subgroup_mask || needs_subgroup_invocation_id) && builtin == BuiltInSubgroupLocalInvocationId) + { + builtin_subgroup_invocation_id_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var.self); + has_subgroup_invocation_id = true; + } + + if ((need_subgroup_ge_mask || needs_subgroup_size) && builtin == BuiltInSubgroupSize) + { + builtin_subgroup_size_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var.self); + has_subgroup_size = true; + } + + if (need_multiview) + { + switch (builtin) + { + case BuiltInInstanceIndex: + // The view index here is derived from the instance index. + builtin_instance_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var.self); + has_instance_idx = true; + break; + case BuiltInBaseInstance: + // If a non-zero base instance is used, we need to adjust for it when calculating the view index. + builtin_base_instance_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var.self); + has_base_instance = true; + break; + case BuiltInViewIndex: + builtin_view_idx_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var.self); + has_view_idx = true; + break; + default: + break; + } + } + + if (need_local_invocation_index && builtin == BuiltInLocalInvocationIndex) + { + builtin_local_invocation_index_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var.self); + has_local_invocation_index = true; + } + + if (need_workgroup_size && builtin == BuiltInLocalInvocationId) + { + builtin_workgroup_size_id = var.self; + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self); + has_workgroup_size = true; + } + + // The base workgroup needs to have the same type and vector size + // as the workgroup or invocation ID, so keep track of the type that + // was used. + if (need_dispatch_base && workgroup_id_type == 0 && + (builtin == BuiltInWorkgroupId || builtin == BuiltInGlobalInvocationId)) + workgroup_id_type = var.basetype; + }); + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if ((!has_frag_coord || (msl_options.multiview && !has_view_idx) || + (msl_options.arrayed_subpass_input && !msl_options.multiview && !has_layer)) && + (!msl_options.use_framebuffer_fetch_subpasses) && need_subpass_input) + { + if (!has_frag_coord) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_FragCoord. + SPIRType vec4_type; + vec4_type.basetype = SPIRType::Float; + vec4_type.width = 32; + vec4_type.vecsize = 4; + set(type_id, vec4_type); + + SPIRType vec4_type_ptr; + vec4_type_ptr = vec4_type; + vec4_type_ptr.pointer = true; + vec4_type_ptr.parent_type = type_id; + vec4_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, vec4_type_ptr); + ptr_type.self = type_id; + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord); + builtin_frag_coord_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInFragCoord, var_id); + } + + if (!has_layer && msl_options.arrayed_subpass_input && !msl_options.multiview) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_Layer. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); + builtin_layer_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInLayer, var_id); + } + + if (!has_view_idx && msl_options.multiview) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_ViewIndex. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex); + builtin_view_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id); + } + } + + if (!has_sample_id && (need_sample_pos || needs_sample_id)) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_SampleID. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId); + builtin_sample_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id); + } + + if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) || + (need_multiview && (!has_instance_idx || !has_base_instance || !has_view_idx))) + { + uint32_t type_ptr_id = ir.increase_bound_by(1); + + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + if (need_vertex_params && !has_vertex_idx) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_VertexIndex. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex); + builtin_vertex_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id); + } + + if (need_vertex_params && !has_base_vertex) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_BaseVertex. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex); + builtin_base_vertex_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id); + } + + if (!has_instance_idx) // Needed by both multiview and tessellation + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_InstanceIndex. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex); + builtin_instance_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id); + } + + if (!has_base_instance) // Needed by both multiview and tessellation + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_BaseInstance. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance); + builtin_base_instance_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id); + } + + if (need_multiview) + { + // Multiview shaders are not allowed to write to gl_Layer, ostensibly because + // it is implicitly written from gl_ViewIndex, but we have to do that explicitly. + // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but + // gl_Layer is an output in vertex-pipeline shaders. + uint32_t type_ptr_out_id = ir.increase_bound_by(2); + SPIRType uint_type_ptr_out; + uint_type_ptr_out = get_uint_type(); + uint_type_ptr_out.pointer = true; + uint_type_ptr_out.parent_type = get_uint_type_id(); + uint_type_ptr_out.storage = StorageClassOutput; + auto &ptr_out_type = set(type_ptr_out_id, uint_type_ptr_out); + ptr_out_type.self = get_uint_type_id(); + uint32_t var_id = type_ptr_out_id + 1; + set(var_id, type_ptr_out_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); + builtin_layer_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id); + } + + if (need_multiview && !has_view_idx) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_ViewIndex. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex); + builtin_view_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id); + } + } + + if ((need_tesc_params && (msl_options.multi_patch_workgroup || !has_invocation_id || !has_primitive_id)) || + need_grid_params) + { + uint32_t type_ptr_id = ir.increase_bound_by(1); + + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + if (msl_options.multi_patch_workgroup || need_grid_params) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_GlobalInvocationID. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInGlobalInvocationId); + builtin_invocation_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInGlobalInvocationId, var_id); + } + else if (need_tesc_params && !has_invocation_id) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_InvocationID. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId); + builtin_invocation_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInInvocationId, var_id); + } + + if (need_tesc_params && !has_primitive_id) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_PrimitiveID. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId); + builtin_primitive_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInPrimitiveId, var_id); + } + + if (need_grid_params) + { + uint32_t var_id = ir.increase_bound_by(1); + + set(var_id, build_extended_vector_type(get_uint_type_id(), 3), StorageClassInput); + set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize); + get_entry_point().interface_variables.push_back(var_id); + set_name(var_id, "spvStageInputSize"); + builtin_stage_input_size_id = var_id; + } + } + + if (!has_subgroup_invocation_id && (need_subgroup_mask || needs_subgroup_invocation_id)) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_SubgroupInvocationID. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupLocalInvocationId); + builtin_subgroup_invocation_id_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupLocalInvocationId, var_id); + } + + if (!has_subgroup_size && (need_subgroup_ge_mask || needs_subgroup_size)) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_SubgroupSize. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSubgroupSize); + builtin_subgroup_size_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInSubgroupSize, var_id); + } + + if (need_dispatch_base || need_vertex_base_params) + { + if (workgroup_id_type == 0) + workgroup_id_type = build_extended_vector_type(get_uint_type_id(), 3); + uint32_t var_id; + if (msl_options.supports_msl_version(1, 2)) + { + // If we have MSL 1.2, we can (ab)use the [[grid_origin]] builtin + // to convey this information and save a buffer slot. + uint32_t offset = ir.increase_bound_by(1); + var_id = offset; + + set(var_id, workgroup_id_type, StorageClassInput); + set_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase); + get_entry_point().interface_variables.push_back(var_id); + } + else + { + // Otherwise, we need to fall back to a good ol' fashioned buffer. + uint32_t offset = ir.increase_bound_by(2); + var_id = offset; + uint32_t type_id = offset + 1; + + SPIRType var_type = get(workgroup_id_type); + var_type.storage = StorageClassUniform; + set(type_id, var_type); + + set(var_id, type_id, StorageClassUniform); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(5u)); + set_decoration(var_id, DecorationBinding, msl_options.indirect_params_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.indirect_params_buffer_index); + } + set_name(var_id, "spvDispatchBase"); + builtin_dispatch_base_id = var_id; + } + + if (need_sample_mask && !does_shader_write_sample_mask) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t var_id = offset + 1; + + // Create gl_SampleMask. + SPIRType uint_type_ptr_out; + uint_type_ptr_out = get_uint_type(); + uint_type_ptr_out.pointer = true; + uint_type_ptr_out.parent_type = get_uint_type_id(); + uint_type_ptr_out.storage = StorageClassOutput; + + auto &ptr_out_type = set(offset, uint_type_ptr_out); + ptr_out_type.self = get_uint_type_id(); + set(var_id, offset, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSampleMask); + builtin_sample_mask_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var_id); + } + + if (need_local_invocation_index && !has_local_invocation_index) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_LocalInvocationIndex. + SPIRType uint_type_ptr; + uint_type_ptr = get_uint_type(); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = get_uint_type_id(); + uint_type_ptr.storage = StorageClassInput; + + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = get_uint_type_id(); + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLocalInvocationIndex); + builtin_local_invocation_index_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInLocalInvocationIndex, var_id); + } + + if (need_workgroup_size && !has_workgroup_size) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_ptr_id = offset; + uint32_t var_id = offset + 1; + + // Create gl_WorkgroupSize. + uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3); + SPIRType uint_type_ptr = get(type_id); + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize); + builtin_workgroup_size_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); + } + } + + if (needs_swizzle_buffer_def) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvSwizzleConstants"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, kSwizzleBufferBinding); + set_decoration(var_id, DecorationBinding, msl_options.swizzle_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.swizzle_buffer_index); + swizzle_buffer_id = var_id; + } + + if (!buffers_requiring_array_length.empty()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvBufferSizeConstants"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, kBufferSizeBufferBinding); + set_decoration(var_id, DecorationBinding, msl_options.buffer_size_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index); + buffer_size_buffer_id = var_id; + } + + if (needs_view_mask_buffer()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvViewMask"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(4u)); + set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index); + view_mask_buffer_id = var_id; + } + + if (!buffers_requiring_dynamic_offset.empty()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvDynamicOffsets"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(5u)); + set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.dynamic_offsets_buffer_index); + dynamic_offsets_buffer_id = var_id; + } +} + +// Checks if the specified builtin variable (e.g. gl_InstanceIndex) is marked as active. +// If not, it marks it as active and forces a recompilation. +// This might be used when the optimization of inactive builtins was too optimistic (e.g. when "spvOut" is emitted). +void CompilerMSL::ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin) +{ + Bitset *active_builtins = nullptr; + switch (storage) + { + case StorageClassInput: + active_builtins = &active_input_builtins; + break; + + case StorageClassOutput: + active_builtins = &active_output_builtins; + break; + + default: + break; + } + + // At this point, the specified builtin variable must have already been declared in the entry point. + // If not, mark as active and force recompile. + if (active_builtins != nullptr && !active_builtins->get(builtin)) + { + active_builtins->set(builtin); + force_recompile(); + } +} + +void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id) +{ + Bitset *active_builtins = nullptr; + switch (storage) + { + case StorageClassInput: + active_builtins = &active_input_builtins; + break; + + case StorageClassOutput: + active_builtins = &active_output_builtins; + break; + + default: + break; + } + + assert(active_builtins != nullptr); + active_builtins->set(builtin); + + auto &var = get_entry_point().interface_variables; + if (find(begin(var), end(var), VariableID(id)) == end(var)) + var.push_back(id); +} + +uint32_t CompilerMSL::build_constant_uint_array_pointer() +{ + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_ptr_id = offset; + uint32_t type_ptr_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create a buffer to hold extra data, including the swizzle constants. + SPIRType uint_type_pointer = get_uint_type(); + uint_type_pointer.pointer = true; + uint_type_pointer.pointer_depth = 1; + uint_type_pointer.parent_type = get_uint_type_id(); + uint_type_pointer.storage = StorageClassUniform; + set(type_ptr_id, uint_type_pointer); + set_decoration(type_ptr_id, DecorationArrayStride, 4); + + SPIRType uint_type_pointer2 = uint_type_pointer; + uint_type_pointer2.pointer_depth++; + uint_type_pointer2.parent_type = type_ptr_id; + set(type_ptr_ptr_id, uint_type_pointer2); + + set(var_id, type_ptr_ptr_id, StorageClassUniformConstant); + return var_id; +} + +static string create_sampler_address(const char *prefix, MSLSamplerAddress addr) +{ + switch (addr) + { + case MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE: + return join(prefix, "address::clamp_to_edge"); + case MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO: + return join(prefix, "address::clamp_to_zero"); + case MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER: + return join(prefix, "address::clamp_to_border"); + case MSL_SAMPLER_ADDRESS_REPEAT: + return join(prefix, "address::repeat"); + case MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT: + return join(prefix, "address::mirrored_repeat"); + default: + SPIRV_CROSS_THROW("Invalid sampler addressing mode."); + } +} + +SPIRType &CompilerMSL::get_stage_in_struct_type() +{ + auto &si_var = get(stage_in_var_id); + return get_variable_data_type(si_var); +} + +SPIRType &CompilerMSL::get_stage_out_struct_type() +{ + auto &so_var = get(stage_out_var_id); + return get_variable_data_type(so_var); +} + +SPIRType &CompilerMSL::get_patch_stage_in_struct_type() +{ + auto &si_var = get(patch_stage_in_var_id); + return get_variable_data_type(si_var); +} + +SPIRType &CompilerMSL::get_patch_stage_out_struct_type() +{ + auto &so_var = get(patch_stage_out_var_id); + return get_variable_data_type(so_var); +} + +std::string CompilerMSL::get_tess_factor_struct_name() +{ + if (get_entry_point().flags.get(ExecutionModeTriangles)) + return "MTLTriangleTessellationFactorsHalf"; + return "MTLQuadTessellationFactorsHalf"; +} + +SPIRType &CompilerMSL::get_uint_type() +{ + return get(get_uint_type_id()); +} + +uint32_t CompilerMSL::get_uint_type_id() +{ + if (uint_type_id != 0) + return uint_type_id; + + uint_type_id = ir.increase_bound_by(1); + + SPIRType type; + type.basetype = SPIRType::UInt; + type.width = 32; + set(uint_type_id, type); + return uint_type_id; +} + +void CompilerMSL::emit_entry_point_declarations() +{ + // FIXME: Get test coverage here ... + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries + declare_complex_constant_arrays(); + + // Emit constexpr samplers here. + for (auto &samp : constexpr_samplers_by_id) + { + auto &var = get(samp.first); + auto &type = get(var.basetype); + if (type.basetype == SPIRType::Sampler) + add_resource_name(samp.first); + + SmallVector args; + auto &s = samp.second; + + if (s.coord != MSL_SAMPLER_COORD_NORMALIZED) + args.push_back("coord::pixel"); + + if (s.min_filter == s.mag_filter) + { + if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST) + args.push_back("filter::linear"); + } + else + { + if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST) + args.push_back("min_filter::linear"); + if (s.mag_filter != MSL_SAMPLER_FILTER_NEAREST) + args.push_back("mag_filter::linear"); + } + + switch (s.mip_filter) + { + case MSL_SAMPLER_MIP_FILTER_NONE: + // Default + break; + case MSL_SAMPLER_MIP_FILTER_NEAREST: + args.push_back("mip_filter::nearest"); + break; + case MSL_SAMPLER_MIP_FILTER_LINEAR: + args.push_back("mip_filter::linear"); + break; + default: + SPIRV_CROSS_THROW("Invalid mip filter."); + } + + if (s.s_address == s.t_address && s.s_address == s.r_address) + { + if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("", s.s_address)); + } + else + { + if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("s_", s.s_address)); + if (s.t_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("t_", s.t_address)); + if (s.r_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("r_", s.r_address)); + } + + if (s.compare_enable) + { + switch (s.compare_func) + { + case MSL_SAMPLER_COMPARE_FUNC_ALWAYS: + args.push_back("compare_func::always"); + break; + case MSL_SAMPLER_COMPARE_FUNC_NEVER: + args.push_back("compare_func::never"); + break; + case MSL_SAMPLER_COMPARE_FUNC_EQUAL: + args.push_back("compare_func::equal"); + break; + case MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL: + args.push_back("compare_func::not_equal"); + break; + case MSL_SAMPLER_COMPARE_FUNC_LESS: + args.push_back("compare_func::less"); + break; + case MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL: + args.push_back("compare_func::less_equal"); + break; + case MSL_SAMPLER_COMPARE_FUNC_GREATER: + args.push_back("compare_func::greater"); + break; + case MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL: + args.push_back("compare_func::greater_equal"); + break; + default: + SPIRV_CROSS_THROW("Invalid sampler compare function."); + } + } + + if (s.s_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || s.t_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || + s.r_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER) + { + switch (s.border_color) + { + case MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK: + args.push_back("border_color::opaque_black"); + break; + case MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE: + args.push_back("border_color::opaque_white"); + break; + case MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK: + args.push_back("border_color::transparent_black"); + break; + default: + SPIRV_CROSS_THROW("Invalid sampler border color."); + } + } + + if (s.anisotropy_enable) + args.push_back(join("max_anisotropy(", s.max_anisotropy, ")")); + if (s.lod_clamp_enable) + { + args.push_back(join("lod_clamp(", convert_to_string(s.lod_clamp_min, current_locale_radix_character), ", ", + convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")")); + } + + // If we would emit no arguments, then omit the parentheses entirely. Otherwise, + // we'll wind up with a "most vexing parse" situation. + if (args.empty()) + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + ";"); + else + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + "(", merge(args), ");"); + } + + // Emit dynamic buffers here. + for (auto &dynamic_buffer : buffers_requiring_dynamic_offset) + { + if (!dynamic_buffer.second.second) + { + // Could happen if no buffer was used at requested binding point. + continue; + } + + const auto &var = get(dynamic_buffer.second.second); + uint32_t var_id = var.self; + const auto &type = get_variable_data_type(var); + string name = to_name(var.self); + uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet); + uint32_t arg_id = argument_buffer_ids[desc_set]; + uint32_t base_index = dynamic_buffer.second.first; + + if (!type.array.empty()) + { + // This is complicated, because we need to support arrays of arrays. + // And it's even worse if the outermost dimension is a runtime array, because now + // all this complicated goop has to go into the shader itself. (FIXME) + if (!type.array[type.array.size() - 1]) + SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet."); + else + { + is_using_builtin_array = true; + statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name, + type_to_array_glsl(type), " ="); + + uint32_t dim = uint32_t(type.array.size()); + uint32_t j = 0; + for (SmallVector indices(type.array.size()); + indices[type.array.size() - 1] < to_array_size_literal(type); j++) + { + while (dim > 0) + { + begin_scope(); + --dim; + } + + string arrays; + for (uint32_t i = uint32_t(type.array.size()); i; --i) + arrays += join("[", indices[i - 1], "]"); + statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ", + to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ", + to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"), + arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),"); + + while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1) + { + end_scope(","); + indices[dim++] = 0; + } + } + end_scope_decl(); + statement_no_indent(""); + is_using_builtin_array = false; + } + } + else + { + statement(get_argument_address_space(var), " auto& ", to_restrict(var_id), name, " = *(", + get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((", + get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".", + ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);"); + } + } + + // Emit buffer arrays here. + for (uint32_t array_id : buffer_arrays) + { + const auto &var = get(array_id); + const auto &type = get_variable_data_type(var); + const auto &buffer_type = get_variable_element_type(var); + string name = to_name(array_id); + statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ", to_restrict(array_id), name, + "[] ="); + begin_scope(); + for (uint32_t i = 0; i < to_array_size_literal(type); ++i) + statement(name, "_", i, ","); + end_scope_decl(); + statement_no_indent(""); + } + // For some reason, without this, we end up emitting the arrays twice. + buffer_arrays.clear(); + + // Emit disabled fragment outputs. + std::sort(disabled_frag_outputs.begin(), disabled_frag_outputs.end()); + for (uint32_t var_id : disabled_frag_outputs) + { + auto &var = get(var_id); + add_local_variable_name(var_id); + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } +} + +string CompilerMSL::compile() +{ + ir.fixup_reserved_names(); + + // Do not deal with GLES-isms like precision, older extensions and such. + options.vulkan_semantics = true; + options.es = false; + options.version = 450; + backend.null_pointer_literal = "nullptr"; + backend.float_literal_suffix = false; + backend.uint32_t_literal_suffix = true; + backend.int16_t_literal_suffix = ""; + backend.uint16_t_literal_suffix = ""; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.basic_int8_type = "char"; + backend.basic_uint8_type = "uchar"; + backend.basic_int16_type = "short"; + backend.basic_uint16_type = "ushort"; + backend.discard_literal = "discard_fragment()"; + backend.demote_literal = "discard_fragment()"; + backend.boolean_mix_function = "select"; + backend.swizzle_is_function = false; + backend.shared_is_implied = false; + backend.use_initializer_list = true; + backend.use_typed_initializer_list = true; + backend.native_row_major_matrix = false; + backend.unsized_array_supported = false; + backend.can_declare_arrays_inline = false; + backend.allow_truncated_access_chain = true; + backend.comparison_image_samples_scalar = true; + backend.native_pointers = true; + backend.nonuniform_qualifier = ""; + backend.support_small_type_sampling_result = true; + backend.supports_empty_struct = true; + + // Allow Metal to use the array template unless we force it off. + backend.can_return_array = !msl_options.force_native_arrays; + backend.array_is_value_type = !msl_options.force_native_arrays; + // Arrays which are part of buffer objects are never considered to be native arrays. + backend.buffer_offset_array_is_value_type = false; + + capture_output_to_buffer = msl_options.capture_output_to_buffer; + is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; + + // Initialize array here rather than constructor, MSVC 2013 workaround. + for (auto &id : next_metal_resource_ids) + id = 0; + + fixup_type_alias(); + replace_illegal_names(); + + build_function_control_flow_graphs_and_analyze(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + analyze_sampled_image_usage(); + analyze_interlocked_resource_usage(); + preprocess_op_codes(); + build_implicit_builtins(); + + fixup_image_load_store_access(); + + set_enabled_interface_variables(get_active_interface_variables()); + if (msl_options.force_active_argument_buffer_resources) + activate_argument_buffer_resources(); + + if (swizzle_buffer_id) + active_interface_variables.insert(swizzle_buffer_id); + if (buffer_size_buffer_id) + active_interface_variables.insert(buffer_size_buffer_id); + if (view_mask_buffer_id) + active_interface_variables.insert(view_mask_buffer_id); + if (dynamic_offsets_buffer_id) + active_interface_variables.insert(dynamic_offsets_buffer_id); + if (builtin_layer_id) + active_interface_variables.insert(builtin_layer_id); + if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2)) + active_interface_variables.insert(builtin_dispatch_base_id); + if (builtin_sample_mask_id) + active_interface_variables.insert(builtin_sample_mask_id); + + // Create structs to hold input, output and uniform variables. + // Do output first to ensure out. is declared at top of entry function. + qual_pos_var_name = ""; + stage_out_var_id = add_interface_block(StorageClassOutput); + patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); + stage_in_var_id = add_interface_block(StorageClassInput); + if (get_execution_model() == ExecutionModelTessellationEvaluation) + patch_stage_in_var_id = add_interface_block(StorageClassInput, true); + + if (get_execution_model() == ExecutionModelTessellationControl) + stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput); + if (is_tessellation_shader()) + stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput); + + // Metal vertex functions that define no output must disable rasterization and return void. + if (!stage_out_var_id) + is_rasterization_disabled = true; + + // Convert the use of global variables to recursively-passed function parameters + localize_global_variables(); + extract_global_variables_from_functions(); + + // Mark any non-stage-in structs to be tightly packed. + mark_packable_structs(); + reorder_type_alias(); + + // Add fixup hooks required by shader inputs and outputs. This needs to happen before + // the loop, so the hooks aren't added multiple times. + fix_up_shader_inputs_outputs(); + + // If we are using argument buffers, we create argument buffer structures for them here. + // These buffers will be used in the entry point, not the individual resources. + if (msl_options.argument_buffers) + { + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("Argument buffers can only be used with MSL 2.0 and up."); + analyze_argument_buffers(); + } + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + // Start bindings at zero. + next_metal_resource_index_buffer = 0; + next_metal_resource_index_texture = 0; + next_metal_resource_index_sampler = 0; + for (auto &id : next_metal_resource_ids) + id = 0; + + // Move constructor for this type is broken on GCC 4.9 ... + buffer.reset(); + + emit_header(); + emit_custom_templates(); + emit_specialization_constants_and_structs(); + emit_resources(); + emit_custom_functions(); + emit_function(get(ir.default_entry_point), Bitset()); + + pass_count++; + } while (is_forcing_recompilation()); + + return buffer.str(); +} + +// Register the need to output any custom functions. +void CompilerMSL::preprocess_op_codes() +{ + OpCodePreprocessor preproc(*this); + traverse_all_reachable_opcodes(get(ir.default_entry_point), preproc); + + suppress_missing_prototypes = preproc.suppress_missing_prototypes; + + if (preproc.uses_atomics) + { + add_header_line("#include "); + add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\""); + } + + // Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to + // resources must disable rasterization and return void. + if (preproc.uses_resource_write) + is_rasterization_disabled = true; + + // Tessellation control shaders are run as compute functions in Metal, and so + // must capture their output to a buffer. + if (get_execution_model() == ExecutionModelTessellationControl || + (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)) + { + is_rasterization_disabled = true; + capture_output_to_buffer = true; + } + + if (preproc.needs_subgroup_invocation_id) + needs_subgroup_invocation_id = true; + if (preproc.needs_subgroup_size) + needs_subgroup_size = true; + // build_implicit_builtins() hasn't run yet, and in fact, this needs to execute + // before then so that gl_SampleID will get added; so we also need to check if + // that function would add gl_FragCoord. + if (preproc.needs_sample_id || msl_options.force_sample_rate_shading || + (is_sample_rate() && (active_input_builtins.get(BuiltInFragCoord) || + (need_subpass_input && !msl_options.use_framebuffer_fetch_subpasses)))) + needs_sample_id = true; +} + +// Move the Private and Workgroup global variables to the entry function. +// Non-constant variables cannot have global scope in Metal. +void CompilerMSL::localize_global_variables() +{ + auto &entry_func = get(ir.default_entry_point); + auto iter = global_variables.begin(); + while (iter != global_variables.end()) + { + uint32_t v_id = *iter; + auto &var = get(v_id); + if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup) + { + if (!variable_is_lut(var)) + entry_func.add_local_variable(v_id); + iter = global_variables.erase(iter); + } + else + iter++; + } +} + +// For any global variable accessed directly by a function, +// extract that variable and add it as an argument to that function. +void CompilerMSL::extract_global_variables_from_functions() +{ + // Uniforms + unordered_set global_var_ids; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (var.storage == StorageClassInput || var.storage == StorageClassOutput || + var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) + { + global_var_ids.insert(var.self); + } + }); + + // Local vars that are declared in the main function and accessed directly by a function + auto &entry_func = get(ir.default_entry_point); + for (auto &var : entry_func.local_variables) + if (get(var).storage != StorageClassFunction) + global_var_ids.insert(var); + + std::set added_arg_ids; + unordered_set processed_func_ids; + extract_global_variables_from_function(ir.default_entry_point, added_arg_ids, global_var_ids, processed_func_ids); +} + +// MSL does not support the use of global variables for shader input content. +// For any global variable accessed directly by the specified function, extract that variable, +// add it as an argument to that function, and the arg to the added_arg_ids collection. +void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::set &added_arg_ids, + unordered_set &global_var_ids, + unordered_set &processed_func_ids) +{ + // Avoid processing a function more than once + if (processed_func_ids.find(func_id) != processed_func_ids.end()) + { + // Return function global variables + added_arg_ids = function_global_vars[func_id]; + return; + } + + processed_func_ids.insert(func_id); + + auto &func = get(func_id); + + // Recursively establish global args added to functions on which we depend. + for (auto block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); + + switch (op) + { + case OpLoad: + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + case OpArrayLength: + { + uint32_t base_id = ops[2]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + + // Use Metal's native frame-buffer fetch API for subpass inputs. + auto &type = get(ops[0]); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + (!msl_options.use_framebuffer_fetch_subpasses)) + { + // Implicitly reads gl_FragCoord. + assert(builtin_frag_coord_id != 0); + added_arg_ids.insert(builtin_frag_coord_id); + if (msl_options.multiview) + { + // Implicitly reads gl_ViewIndex. + assert(builtin_view_idx_id != 0); + added_arg_ids.insert(builtin_view_idx_id); + } + else if (msl_options.arrayed_subpass_input) + { + // Implicitly reads gl_Layer. + assert(builtin_layer_id != 0); + added_arg_ids.insert(builtin_layer_id); + } + } + + break; + } + + case OpFunctionCall: + { + // First see if any of the function call args are globals + for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++) + { + uint32_t arg_id = ops[arg_idx]; + if (global_var_ids.find(arg_id) != global_var_ids.end()) + added_arg_ids.insert(arg_id); + } + + // Then recurse into the function itself to extract globals used internally in the function + uint32_t inner_func_id = ops[2]; + std::set inner_func_args; + extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids, + processed_func_ids); + added_arg_ids.insert(inner_func_args.begin(), inner_func_args.end()); + break; + } + + case OpStore: + { + uint32_t base_id = ops[0]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + + uint32_t rvalue_id = ops[1]; + if (global_var_ids.find(rvalue_id) != global_var_ids.end()) + added_arg_ids.insert(rvalue_id); + + break; + } + + case OpSelect: + { + uint32_t base_id = ops[3]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + base_id = ops[4]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + // When using the pointer, we need to know which variable it is actually loaded from. + uint32_t base_id = ops[2]; + auto *var = maybe_get_backing_variable(base_id); + if (var && atomic_image_vars.count(var->self)) + { + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + } + break; + } + + case OpExtInst: + { + uint32_t extension_set = ops[2]; + if (get(extension_set).ext == SPIRExtension::GLSL) + { + auto op_450 = static_cast(ops[3]); + switch (op_450) + { + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + { + // For these, we really need the stage-in block. It is theoretically possible to pass the + // interpolant object, but a) doing so would require us to create an entirely new variable + // with Interpolant type, and b) if we have a struct or array, handling all the members and + // elements could get unwieldy fast. + added_arg_ids.insert(stage_in_var_id); + break; + } + default: + break; + } + } + break; + } + + case OpGroupNonUniformInverseBallot: + { + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + } + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + { + added_arg_ids.insert(builtin_subgroup_size_id); + break; + } + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + switch (operation) + { + case GroupOperationReduce: + added_arg_ids.insert(builtin_subgroup_size_id); + break; + case GroupOperationInclusiveScan: + case GroupOperationExclusiveScan: + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + default: + break; + } + break; + } + + default: + break; + } + + // TODO: Add all other operations which can affect memory. + // We should consider a more unified system here to reduce boiler-plate. + // This kind of analysis is done in several places ... + } + } + + function_global_vars[func_id] = added_arg_ids; + + // Add the global variables as arguments to the function + if (func_id != ir.default_entry_point) + { + bool added_in = false; + bool added_out = false; + for (uint32_t arg_id : added_arg_ids) + { + auto &var = get(arg_id); + uint32_t type_id = var.basetype; + auto *p_type = &get(type_id); + BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn)); + + if (((is_tessellation_shader() && var.storage == StorageClassInput) || + (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)) && + !(has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type)) && + (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + p_type->basetype == SPIRType::Struct)) + { + // Tessellation control shaders see inputs and per-vertex outputs as arrays. + // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays. + // We collected them into a structure; we must pass the array of this + // structure to the function. + std::string name; + if (var.storage == StorageClassInput) + { + if (added_in) + continue; + name = "gl_in"; + arg_id = stage_in_ptr_var_id; + added_in = true; + } + else if (var.storage == StorageClassOutput) + { + if (added_out) + continue; + name = "gl_out"; + arg_id = stage_out_ptr_var_id; + added_out = true; + } + type_id = get(arg_id).basetype; + uint32_t next_id = ir.increase_bound_by(1); + func.add_parameter(type_id, next_id, true); + set(next_id, type_id, StorageClassFunction, 0, arg_id); + + set_name(next_id, name); + } + else if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct) + { + // Get the pointee type + type_id = get_pointee_type_id(type_id); + p_type = &get(type_id); + + uint32_t mbr_idx = 0; + for (auto &mbr_type_id : p_type->member_types) + { + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin); + if (is_builtin && has_active_builtin(builtin, var.storage)) + { + // Add a arg variable with the same type and decorations as the member + uint32_t next_ids = ir.increase_bound_by(2); + uint32_t ptr_type_id = next_ids + 0; + uint32_t var_id = next_ids + 1; + + // Make sure we have an actual pointer type, + // so that we will get the appropriate address space when declaring these builtins. + auto &ptr = set(ptr_type_id, get(mbr_type_id)); + ptr.self = mbr_type_id; + ptr.storage = var.storage; + ptr.pointer = true; + ptr.parent_type = mbr_type_id; + + func.add_parameter(mbr_type_id, var_id, true); + set(var_id, ptr_type_id, StorageClassFunction); + ir.meta[var_id].decoration = ir.meta[type_id].members[mbr_idx]; + } + mbr_idx++; + } + } + else + { + uint32_t next_id = ir.increase_bound_by(1); + func.add_parameter(type_id, next_id, true); + set(next_id, type_id, StorageClassFunction, 0, arg_id); + + // Ensure the existing variable has a valid name and the new variable has all the same meta info + set_name(arg_id, ensure_valid_name(to_name(arg_id), "v")); + ir.meta[next_id] = ir.meta[arg_id]; + } + } + } +} + +// For all variables that are some form of non-input-output interface block, mark that all the structs +// that are recursively contained within the type referenced by that variable should be packed tightly. +void CompilerMSL::mark_packable_structs() +{ + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (var.storage != StorageClassFunction && !is_hidden_variable(var)) + { + auto &type = this->get(var.basetype); + if (type.pointer && + (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + mark_as_packable(type); + } + }); +} + +// If the specified type is a struct, it and any nested structs +// are marked as packable with the SPIRVCrossDecorationBufferBlockRepacked decoration, +void CompilerMSL::mark_as_packable(SPIRType &type) +{ + // If this is not the base type (eg. it's a pointer or array), tunnel down + if (type.parent_type) + { + mark_as_packable(get(type.parent_type)); + return; + } + + if (type.basetype == SPIRType::Struct) + { + set_extended_decoration(type.self, SPIRVCrossDecorationBufferBlockRepacked); + + // Recurse + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + uint32_t mbr_type_id = type.member_types[mbr_idx]; + auto &mbr_type = get(mbr_type_id); + mark_as_packable(mbr_type); + if (mbr_type.type_alias) + { + auto &mbr_type_alias = get(mbr_type.type_alias); + mark_as_packable(mbr_type_alias); + } + } + } +} + +// If a shader input exists at the location, it is marked as being used by this shader +void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, StorageClass storage) +{ + if (storage != StorageClassInput) + return; + if (is_array(type)) + { + uint32_t dim = 1; + for (uint32_t i = 0; i < type.array.size(); i++) + dim *= to_array_size_literal(type, i); + for (uint32_t i = 0; i < dim; i++) + { + if (is_matrix(type)) + { + for (uint32_t j = 0; j < type.columns; j++) + inputs_in_use.insert(location++); + } + else + inputs_in_use.insert(location++); + } + } + else if (is_matrix(type)) + { + for (uint32_t i = 0; i < type.columns; i++) + inputs_in_use.insert(location + i); + } + else + inputs_in_use.insert(location); +} + +uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const +{ + auto itr = fragment_output_components.find(location); + if (itr == end(fragment_output_components)) + return 4; + else + return itr->second; +} + +uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components, SPIRType::BaseType basetype) +{ + uint32_t new_type_id = ir.increase_bound_by(1); + auto &old_type = get(type_id); + auto *type = &set(new_type_id, old_type); + type->vecsize = components; + if (basetype != SPIRType::Unknown) + type->basetype = basetype; + type->self = new_type_id; + type->parent_type = type_id; + type->array.clear(); + type->array_size_literal.clear(); + type->pointer = false; + + if (is_array(old_type)) + { + uint32_t array_type_id = ir.increase_bound_by(1); + type = &set(array_type_id, *type); + type->parent_type = new_type_id; + type->array = old_type.array; + type->array_size_literal = old_type.array_size_literal; + new_type_id = array_type_id; + } + + if (old_type.pointer) + { + uint32_t ptr_type_id = ir.increase_bound_by(1); + type = &set(ptr_type_id, *type); + type->self = new_type_id; + type->parent_type = new_type_id; + type->storage = old_type.storage; + type->pointer = true; + new_type_id = ptr_type_id; + } + + return new_type_id; +} + +uint32_t CompilerMSL::build_msl_interpolant_type(uint32_t type_id, bool is_noperspective) +{ + uint32_t new_type_id = ir.increase_bound_by(1); + SPIRType &type = set(new_type_id, get(type_id)); + type.basetype = SPIRType::Interpolant; + type.parent_type = type_id; + // In Metal, the pull-model interpolant type encodes perspective-vs-no-perspective in the type itself. + // Add this decoration so we know which argument to pass to the template. + if (is_noperspective) + set_decoration(new_type_id, DecorationNoPerspective); + return new_type_id; +} + +void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta) +{ + bool is_builtin = is_builtin_variable(var); + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + bool is_flat = has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_decoration(var.self, DecorationCentroid); + bool is_sample = has_decoration(var.self, DecorationSample); + + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin); + var.basetype = type_id; + + type_id = get_pointee_type_id(var.basetype); + if (meta.strip_array && is_array(get(type_id))) + type_id = get(type_id).parent_type; + auto &type = get(type_id); + uint32_t target_components = 0; + uint32_t type_components = type.vecsize; + + bool padded_output = false; + bool padded_input = false; + uint32_t start_component = 0; + + auto &entry_func = get(ir.default_entry_point); + + // Deal with Component decorations. + InterfaceBlockMeta::LocationMeta *location_meta = nullptr; + if (has_decoration(var.self, DecorationLocation)) + { + auto location_meta_itr = meta.location_meta.find(get_decoration(var.self, DecorationLocation)); + if (location_meta_itr != end(meta.location_meta)) + location_meta = &location_meta_itr->second; + } + + bool pad_fragment_output = has_decoration(var.self, DecorationLocation) && + msl_options.pad_fragment_output_components && + get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput; + + // Check if we need to pad fragment output to match a certain number of components. + if (location_meta) + { + start_component = get_decoration(var.self, DecorationComponent); + uint32_t num_components = location_meta->num_components; + if (pad_fragment_output) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + num_components = std::max(num_components, get_target_components_for_fragment_location(locn)); + } + + if (location_meta->ib_index != ~0u) + { + // We have already declared the variable. Just emit an early-declared variable and fixup as needed. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + + if (var.storage == StorageClassInput) + { + uint32_t ib_index = location_meta->ib_index; + entry_func.fixup_hooks_in.push_back([=, &var]() { + statement(to_name(var.self), " = ", ib_var_ref, ".", to_member_name(ib_type, ib_index), + vector_swizzle(type_components, start_component), ";"); + }); + } + else + { + uint32_t ib_index = location_meta->ib_index; + entry_func.fixup_hooks_out.push_back([=, &var]() { + statement(ib_var_ref, ".", to_member_name(ib_type, ib_index), + vector_swizzle(type_components, start_component), " = ", to_name(var.self), ";"); + }); + } + return; + } + else + { + location_meta->ib_index = uint32_t(ib_type.member_types.size()); + type_id = build_extended_vector_type(type_id, num_components); + if (var.storage == StorageClassInput) + padded_input = true; + else + padded_output = true; + } + } + else if (pad_fragment_output) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + target_components = get_target_components_for_fragment_location(locn); + if (type_components < target_components) + { + // Make a new type here. + type_id = build_extended_vector_type(type_id, target_components); + padded_output = true; + } + } + + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(type_id, is_noperspective)); + else + ib_type.member_types.push_back(type_id); + + // Give the member a name + string mbr_name = ensure_valid_name(to_expression(var.self), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // Update the original variable reference to include the structure reference + string qual_var_name = ib_var_ref + "." + mbr_name; + // If using pull-model interpolation, need to add a call to the correct interpolation method. + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + { + if (is_centroid) + qual_var_name += ".interpolate_at_centroid()"; + else if (is_sample) + qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + qual_var_name += ".interpolate_at_center()"; + } + + if (padded_output || padded_input) + { + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + + if (padded_output) + { + entry_func.fixup_hooks_out.push_back([=, &var]() { + statement(qual_var_name, vector_swizzle(type_components, start_component), " = ", to_name(var.self), + ";"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=, &var]() { + statement(to_name(var.self), " = ", qual_var_name, vector_swizzle(type_components, start_component), + ";"); + }); + } + } + else if (!meta.strip_array) + ir.meta[var.self].decoration.qualified_alias = qual_var_name; + + if (var.storage == StorageClassOutput && var.initializer != ID(0)) + { + if (padded_output || padded_input) + { + entry_func.fixup_hooks_in.push_back( + [=, &var]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); }); + } + else + { + entry_func.fixup_hooks_in.push_back( + [=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); }); + } + } + + // Copy the variable location from the original variable to the member + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + if (storage == StorageClassInput) + { + type_id = ensure_correct_input_type(var.basetype, locn, location_meta ? location_meta->num_components : 0); + if (!location_meta) + var.basetype = type_id; + + type_id = get_pointee_type_id(type_id); + if (meta.strip_array && is_array(get(type_id))) + type_id = get(type_id).parent_type; + if (pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, get(type_id), storage); + } + else if (is_builtin && is_tessellation_shader() && inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, type, storage); + } + + if (!location_meta) + { + if (get_decoration_bitset(var.self).get(DecorationComponent)) + { + uint32_t component = get_decoration(var.self, DecorationComponent); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, component); + } + } + + if (get_decoration_bitset(var.self).get(DecorationIndex)) + { + uint32_t index = get_decoration(var.self, DecorationIndex); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index); + } + + // Mark the member as builtin if needed + if (is_builtin) + { + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + if (builtin == BuiltInPosition && storage == StorageClassOutput) + qual_pos_var_name = qual_var_name; + } + + // Copy interpolation decorations if needed + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } + + // If we have location meta, there is no unique OrigID. We won't need it, since we flatten/unflatten + // the variable to stack anyways here. + if (!location_meta) + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); +} + +void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, + InterfaceBlockMeta &meta) +{ + auto &entry_func = get(ir.default_entry_point); + auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + uint32_t elem_cnt = 0; + + if (is_matrix(var_type)) + { + if (is_array(var_type)) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables."); + + elem_cnt = var_type.columns; + } + else if (is_array(var_type)) + { + if (var_type.array.size() != 1) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables."); + + elem_cnt = to_array_size_literal(var_type); + } + + bool is_builtin = is_builtin_variable(var); + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + bool is_flat = has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_decoration(var.self, DecorationCentroid); + bool is_sample = has_decoration(var.self, DecorationSample); + + auto *usable_type = &var_type; + if (usable_type->pointer) + usable_type = &get(usable_type->parent_type); + while (is_array(*usable_type) || is_matrix(*usable_type)) + usable_type = &get(usable_type->parent_type); + + // If a builtin, force it to have the proper name. + if (is_builtin) + set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); + + bool flatten_from_ib_var = false; + string flatten_from_ib_mbr_name; + + if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance) + { + // Also declare [[clip_distance]] attribute here. + uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(get_variable_data_type_id(var)); + set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); + + flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput); + set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name); + + // When we flatten, we flatten directly from the "out" struct, + // not from a function variable. + flatten_from_ib_var = true; + + if (!msl_options.enable_clip_distance_user_varying) + return; + } + else if (!meta.strip_array) + { + // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped. + entry_func.add_local_variable(var.self); + // We need to declare the variable early and at entry-point scope. + vars_needing_early_declaration.push_back(var.self); + } + + for (uint32_t i = 0; i < elem_cnt; i++) + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + + uint32_t target_components = 0; + bool padded_output = false; + uint32_t type_id = usable_type->self; + + // Check if we need to pad fragment output to match a certain number of components. + if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components && + get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput) + { + uint32_t locn = get_decoration(var.self, DecorationLocation) + i; + target_components = get_target_components_for_fragment_location(locn); + if (usable_type->vecsize < target_components) + { + // Make a new type here. + type_id = build_extended_vector_type(usable_type->self, target_components); + padded_output = true; + } + } + + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(get_pointee_type_id(type_id), is_noperspective)); + else + ib_type.member_types.push_back(get_pointee_type_id(type_id)); + + // Give the member a name + string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // There is no qualified alias since we need to flatten the internal array on return. + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation) + i; + if (storage == StorageClassInput) + { + var.basetype = ensure_correct_input_type(var.basetype, locn); + uint32_t mbr_type_id = ensure_correct_input_type(usable_type->self, locn); + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } + else if (is_builtin && is_tessellation_shader() && inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } + else if (is_builtin && builtin == BuiltInClipDistance) + { + // Declare the ClipDistance as [[user(clipN)]]. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, i); + } + + if (get_decoration_bitset(var.self).get(DecorationIndex)) + { + uint32_t index = get_decoration(var.self, DecorationIndex); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index); + } + + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + + // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped. + if (!meta.strip_array) + { + switch (storage) + { + case StorageClassInput: + entry_func.fixup_hooks_in.push_back([=, &var]() { + if (pull_model_inputs.count(var.self)) + { + string lerp_call; + if (is_centroid) + lerp_call = ".interpolate_at_centroid()"; + else if (is_sample) + lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + lerp_call = ".interpolate_at_center()"; + statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, lerp_call, ";"); + } + else + { + statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); + } + }); + break; + + case StorageClassOutput: + entry_func.fixup_hooks_out.push_back([=, &var]() { + if (padded_output) + { + auto &padded_type = this->get(type_id); + statement( + ib_var_ref, ".", mbr_name, " = ", + remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")), + ";"); + } + else if (flatten_from_ib_var) + statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i, + "];"); + else + statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];"); + }); + break; + + default: + break; + } + } + } +} + +uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) +{ + auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + uint32_t location = get_decoration(var.self, DecorationLocation); + + for (uint32_t i = 0; i < mbr_idx; i++) + { + auto &mbr_type = get(type.member_types[i]); + + // Start counting from any place we have a new location decoration. + if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) + location = get_member_decoration(type.self, mbr_idx, DecorationLocation); + + uint32_t location_count = 1; + + if (mbr_type.columns > 1) + location_count = mbr_type.columns; + + if (!mbr_type.array.empty()) + for (uint32_t j = 0; j < uint32_t(mbr_type.array.size()); j++) + location_count *= to_array_size_literal(mbr_type, j); + + location += location_count; + } + + return location; +} + +void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, + uint32_t mbr_idx, InterfaceBlockMeta &meta) +{ + auto &entry_func = get(ir.default_entry_point); + auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + bool is_flat = + has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) || + has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) || + has_decoration(var.self, DecorationCentroid); + bool is_sample = + has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample); + + uint32_t mbr_type_id = var_type.member_types[mbr_idx]; + auto &mbr_type = get(mbr_type_id); + uint32_t elem_cnt = 0; + + if (is_matrix(mbr_type)) + { + if (is_array(mbr_type)) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables."); + + elem_cnt = mbr_type.columns; + } + else if (is_array(mbr_type)) + { + if (mbr_type.array.size() != 1) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables."); + + elem_cnt = to_array_size_literal(mbr_type); + } + + auto *usable_type = &mbr_type; + if (usable_type->pointer) + usable_type = &get(usable_type->parent_type); + while (is_array(*usable_type) || is_matrix(*usable_type)) + usable_type = &get(usable_type->parent_type); + + bool flatten_from_ib_var = false; + string flatten_from_ib_mbr_name; + + if (storage == StorageClassOutput && is_builtin && builtin == BuiltInClipDistance) + { + // Also declare [[clip_distance]] attribute here. + uint32_t clip_array_mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(mbr_type_id); + set_member_decoration(ib_type.self, clip_array_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); + + flatten_from_ib_mbr_name = builtin_to_glsl(BuiltInClipDistance, StorageClassOutput); + set_member_name(ib_type.self, clip_array_mbr_idx, flatten_from_ib_mbr_name); + + // When we flatten, we flatten directly from the "out" struct, + // not from a function variable. + flatten_from_ib_var = true; + + if (!msl_options.enable_clip_distance_user_varying) + return; + } + + for (uint32_t i = 0; i < elem_cnt; i++) + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(usable_type->self, is_noperspective)); + else + ib_type.member_types.push_back(usable_type->self); + + // Give the member a name + string mbr_name = ensure_valid_name(join(to_qualified_member_name(var_type, mbr_idx), "_", i), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + { + uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } + else if (has_decoration(var.self, DecorationLocation)) + { + uint32_t locn = get_accumulated_member_location(var, mbr_idx, meta.strip_array) + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } + else if (is_builtin && is_tessellation_shader() && inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, *usable_type, storage); + } + else if (is_builtin && builtin == BuiltInClipDistance) + { + // Declare the ClipDistance as [[user(clipN)]]. + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, BuiltInClipDistance); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, i); + } + + if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) + SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays make little sense."); + + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); + + // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. + if (!meta.strip_array) + { + switch (storage) + { + case StorageClassInput: + entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() { + if (pull_model_inputs.count(var.self)) + { + string lerp_call; + if (is_centroid) + lerp_call = ".interpolate_at_centroid()"; + else if (is_sample) + lerp_call = join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + lerp_call = ".interpolate_at_center()"; + statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref, + ".", mbr_name, lerp_call, ";"); + } + else + { + statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref, + ".", mbr_name, ";"); + } + }); + break; + + case StorageClassOutput: + entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() { + if (flatten_from_ib_var) + { + statement(ib_var_ref, ".", mbr_name, " = ", ib_var_ref, ".", flatten_from_ib_mbr_name, "[", i, + "];"); + } + else + { + statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".", + to_member_name(var_type, mbr_idx), "[", i, "];"); + } + }); + break; + + default: + break; + } + } + } +} + +void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx, + InterfaceBlockMeta &meta) +{ + auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + auto &entry_func = get(ir.default_entry_point); + + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + bool is_flat = + has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) || + has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) || + has_decoration(var.self, DecorationCentroid); + bool is_sample = + has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample); + + // Add a reference to the member to the interface struct. + uint32_t mbr_type_id = var_type.member_types[mbr_idx]; + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin); + var_type.member_types[mbr_idx] = mbr_type_id; + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types.push_back(build_msl_interpolant_type(mbr_type_id, is_noperspective)); + else + ib_type.member_types.push_back(mbr_type_id); + + // Give the member a name + string mbr_name = ensure_valid_name(to_qualified_member_name(var_type, mbr_idx), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // Update the original variable reference to include the structure reference + string qual_var_name = ib_var_ref + "." + mbr_name; + // If using pull-model interpolation, need to add a call to the correct interpolation method. + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + { + if (is_centroid) + qual_var_name += ".interpolate_at_centroid()"; + else if (is_sample) + qual_var_name += join(".interpolate_at_sample(", to_expression(builtin_sample_id_id), ")"); + else + qual_var_name += ".interpolate_at_center()"; + } + + if (is_builtin && !meta.strip_array) + { + // For the builtin gl_PerVertex, we cannot treat it as a block anyways, + // so redirect to qualified name. + set_member_qualified_name(var_type.self, mbr_idx, qual_var_name); + } + else if (!meta.strip_array) + { + // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. + switch (storage) + { + case StorageClassInput: + entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() { + statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";"); + }); + break; + + case StorageClassOutput: + entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() { + statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";"); + }); + break; + + default: + break; + } + } + + // Copy the variable location from the original variable to the member + if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + { + uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); + if (storage == StorageClassInput) + { + mbr_type_id = ensure_correct_input_type(mbr_type_id, locn); + var_type.member_types[mbr_idx] = mbr_type_id; + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, get(mbr_type_id), storage); + } + else if (has_decoration(var.self, DecorationLocation)) + { + // The block itself might have a location and in this case, all members of the block + // receive incrementing locations. + uint32_t locn = get_accumulated_member_location(var, mbr_idx, meta.strip_array); + if (storage == StorageClassInput) + { + mbr_type_id = ensure_correct_input_type(mbr_type_id, locn); + var_type.member_types[mbr_idx] = mbr_type_id; + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, get(mbr_type_id), storage); + } + else if (is_builtin && is_tessellation_shader() && inputs_by_builtin.count(builtin)) + { + uint32_t locn = 0; + auto builtin_itr = inputs_by_builtin.find(builtin); + if (builtin_itr != end(inputs_by_builtin)) + locn = builtin_itr->second.location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, get(mbr_type_id), storage); + } + + // Copy the component location, if present. + if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) + { + uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp); + } + + // Mark the member as builtin if needed + if (is_builtin) + { + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + if (builtin == BuiltInPosition && storage == StorageClassOutput) + qual_pos_var_name = qual_var_name; + } + + if (storage != StorageClassInput || !pull_model_inputs.count(var.self)) + { + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + } + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); +} + +// In Metal, the tessellation levels are stored as tightly packed half-precision floating point values. +// But, stage-in attribute offsets and strides must be multiples of four, so we can't pass the levels +// individually. Therefore, we must pass them as vectors. Triangles get a single float4, with the outer +// levels in 'xyz' and the inner level in 'w'. Quads get a float4 containing the outer levels and a +// float2 containing the inner levels. +void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var) +{ + auto &entry_func = get(ir.default_entry_point); + auto &var_type = get_variable_element_type(var); + + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + + // Force the variable to have the proper name. + set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); + + if (get_entry_point().flags.get(ExecutionModeTriangles)) + { + // Triangles are tricky, because we want only one member in the struct. + + // We need to declare the variable early and at entry-point scope. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + + string mbr_name = "gl_TessLevel"; + + // If we already added the other one, we can skip this step. + if (!added_builtin_tess_level) + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + + uint32_t type_id = build_extended_vector_type(var_type.self, 4); + + ib_type.member_types.push_back(type_id); + + // Give the member a name + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // There is no qualified alias since we need to flatten the internal array on return. + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, var_type, StorageClassInput); + } + else if (inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, var_type, StorageClassInput); + } + + added_builtin_tess_level = true; + } + + switch (builtin) + { + case BuiltInTessLevelOuter: + entry_func.fixup_hooks_in.push_back([=, &var]() { + statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".x;"); + statement(to_name(var.self), "[1] = ", ib_var_ref, ".", mbr_name, ".y;"); + statement(to_name(var.self), "[2] = ", ib_var_ref, ".", mbr_name, ".z;"); + }); + break; + + case BuiltInTessLevelInner: + entry_func.fixup_hooks_in.push_back( + [=, &var]() { statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".w;"); }); + break; + + default: + assert(false); + break; + } + } + else + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + + uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2); + // Change the type of the variable, too. + uint32_t ptr_type_id = ir.increase_bound_by(1); + auto &new_var_type = set(ptr_type_id, get(type_id)); + new_var_type.pointer = true; + new_var_type.storage = StorageClassInput; + new_var_type.parent_type = type_id; + var.basetype = ptr_type_id; + + ib_type.member_types.push_back(type_id); + + // Give the member a name + string mbr_name = to_expression(var.self); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // Since vectors can be indexed like arrays, there is no need to unpack this. We can + // just refer to the vector directly. So give it a qualified alias. + string qual_var_name = ib_var_ref + "." + mbr_name; + ir.meta[var.self].decoration.qualified_alias = qual_var_name; + + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput); + } + else if (inputs_by_builtin.count(builtin)) + { + uint32_t locn = inputs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, new_var_type, StorageClassInput); + } + } +} + +void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, InterfaceBlockMeta &meta) +{ + auto &entry_func = get(ir.default_entry_point); + // Tessellation control I/O variables and tessellation evaluation per-point inputs are + // usually declared as arrays. In these cases, we want to add the element type to the + // interface block, since in Metal it's the interface block itself which is arrayed. + auto &var_type = meta.strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + bool is_builtin = is_builtin_variable(var); + auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + + if (var_type.basetype == SPIRType::Struct) + { + if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && + !meta.strip_array) + { + // For I/O blocks or structs, we will need to pass the block itself around + // to functions if they are used globally in leaf functions. + // Rather than passing down member by member, + // we unflatten I/O blocks while running the shader, + // and pass the actual struct type down to leaf functions. + // We then unflatten inputs, and flatten outputs in the "fixup" stages. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + } + + if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock)) + { + // In Metal tessellation shaders, the interface block itself is arrayed. This makes things + // very complicated, since stage-in structures in MSL don't support nested structures. + // Luckily, for stage-out when capturing output, we can avoid this and just add + // composite members directly, because the stage-out structure is stored to a buffer, + // not returned. + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta); + } + else + { + // Flatten the struct members into the interface struct + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) + { + builtin = BuiltInMax; + is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + auto &mbr_type = get(var_type.member_types[mbr_idx]); + + if (!is_builtin || has_active_builtin(builtin, storage)) + { + bool is_composite_type = is_matrix(mbr_type) || is_array(mbr_type); + bool attribute_load_store = + storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; + bool storage_is_stage_io = + (storage == StorageClassInput && !(get_execution_model() == ExecutionModelTessellationControl && + msl_options.multi_patch_workgroup)) || + storage == StorageClassOutput; + + // ClipDistance always needs to be declared as user attributes. + if (builtin == BuiltInClipDistance) + is_builtin = false; + + if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type) + { + add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, + meta); + } + else + { + add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, meta); + } + } + } + } + } + else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && + !meta.strip_array && is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner)) + { + add_tess_level_input_to_interface_block(ib_var_ref, ib_type, var); + } + else if (var_type.basetype == SPIRType::Boolean || var_type.basetype == SPIRType::Char || + type_is_integral(var_type) || type_is_floating_point(var_type) || var_type.basetype == SPIRType::Boolean) + { + if (!is_builtin || has_active_builtin(builtin, storage)) + { + bool is_composite_type = is_matrix(var_type) || is_array(var_type); + bool storage_is_stage_io = + (storage == StorageClassInput && + !(get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup)) || + (storage == StorageClassOutput && !capture_output_to_buffer); + bool attribute_load_store = storage == StorageClassInput && get_execution_model() != ExecutionModelFragment; + + // ClipDistance always needs to be declared as user attributes. + if (builtin == BuiltInClipDistance) + is_builtin = false; + + // MSL does not allow matrices or arrays in input or output variables, so need to handle it specially. + if ((!is_builtin || attribute_load_store) && storage_is_stage_io && is_composite_type) + { + add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta); + } + else + { + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, meta); + } + } + } +} + +// Fix up the mapping of variables to interface member indices, which is used to compile access chains +// for per-vertex variables in a tessellation control shader. +void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id) +{ + // Only needed for tessellation shaders and pull-model interpolants. + // Need to redirect interface indices back to variables themselves. + // For structs, each member of the struct need a separate instance. + if (get_execution_model() != ExecutionModelTessellationControl && + !(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput) && + !(get_execution_model() == ExecutionModelFragment && storage == StorageClassInput && + !pull_model_inputs.empty())) + return; + + auto mbr_cnt = uint32_t(ir.meta[ib_type_id].members.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) + { + uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID); + if (!var_id) + continue; + auto &var = get(var_id); + + auto &type = get_variable_element_type(var); + if (storage == StorageClassInput && type.basetype == SPIRType::Struct) + { + uint32_t mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); + + // Only set the lowest InterfaceMemberIndex for each variable member. + // IB struct members will be emitted in-order w.r.t. interface member index. + if (!has_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex)) + set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + else + { + // Only set the lowest InterfaceMemberIndex for each variable. + // IB struct members will be emitted in-order w.r.t. interface member index. + if (!has_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex)) + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + } +} + +// Add an interface structure for the type of storage, which is either StorageClassInput or StorageClassOutput. +// Returns the ID of the newly added variable, or zero if no variable was added. +uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) +{ + // Accumulate the variables that should appear in the interface struct. + SmallVector vars; + bool incl_builtins = storage == StorageClassOutput || is_tessellation_shader(); + bool has_seen_barycentric = false; + + InterfaceBlockMeta meta; + + // Varying interfaces between stages which use "user()" attribute can be dealt with + // without explicit packing and unpacking of components. For any variables which link against the runtime + // in some way (vertex attributes, fragment output, etc), we'll need to deal with it somehow. + bool pack_components = + (storage == StorageClassInput && get_execution_model() == ExecutionModelVertex) || + (storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment) || + (storage == StorageClassOutput && get_execution_model() == ExecutionModelVertex && capture_output_to_buffer); + + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { + if (var.storage != storage) + return; + + auto &type = this->get(var.basetype); + + bool is_builtin = is_builtin_variable(var); + auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + uint32_t location = get_decoration(var_id, DecorationLocation); + + // These builtins are part of the stage in/out structs. + bool is_interface_block_builtin = + (bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance || + bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || bi_type == BuiltInViewportIndex || + bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV || bi_type == BuiltInFragDepth || + bi_type == BuiltInFragStencilRefEXT || bi_type == BuiltInSampleMask) || + (get_execution_model() == ExecutionModelTessellationEvaluation && + (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)); + + bool is_active = interface_variable_exists_in_entry_point(var.self); + if (is_builtin && is_active) + { + // Only emit the builtin if it's active in this entry point. Interface variable list might lie. + is_active = has_active_builtin(bi_type, storage); + } + + bool filter_patch_decoration = (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch; + + bool hidden = is_hidden_variable(var, incl_builtins); + + // ClipDistance is never hidden, we need to emulate it when used as an input. + if (bi_type == BuiltInClipDistance) + hidden = false; + + // It's not enough to simply avoid marking fragment outputs if the pipeline won't + // accept them. We can't put them in the struct at all, or otherwise the compiler + // complains that the outputs weren't explicitly marked. + if (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput && !patch && + ((is_builtin && ((bi_type == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) || + (bi_type == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin))) || + (!is_builtin && !(msl_options.enable_frag_output_mask & (1 << location))))) + { + hidden = true; + disabled_frag_outputs.push_back(var_id); + // If a builtin, force it to have the proper name. + if (is_builtin) + set_name(var_id, builtin_to_glsl(bi_type, StorageClassFunction)); + } + + // Barycentric inputs must be emitted in stage-in, because they can have interpolation arguments. + if (is_active && (bi_type == BuiltInBaryCoordNV || bi_type == BuiltInBaryCoordNoPerspNV)) + { + if (has_seen_barycentric) + SPIRV_CROSS_THROW("Cannot declare both BaryCoordNV and BaryCoordNoPerspNV in same shader in MSL."); + has_seen_barycentric = true; + hidden = false; + } + + if (is_active && !hidden && type.pointer && filter_patch_decoration && + (!is_builtin || is_interface_block_builtin)) + { + vars.push_back(&var); + + if (!is_builtin) + { + // Need to deal specially with DecorationComponent. + // Multiple variables can alias the same Location, and try to make sure each location is declared only once. + // We will swizzle data in and out to make this work. + // We only need to consider plain variables here, not composites. + // This is only relevant for vertex inputs and fragment outputs. + // Technically tessellation as well, but it is too complicated to support. + uint32_t component = get_decoration(var_id, DecorationComponent); + if (component != 0) + { + if (is_tessellation_shader()) + SPIRV_CROSS_THROW("Component decoration is not supported in tessellation shaders."); + else if (pack_components) + { + auto &location_meta = meta.location_meta[location]; + location_meta.num_components = std::max(location_meta.num_components, component + type.vecsize); + } + } + } + } + }); + + // If no variables qualify, leave. + // For patch input in a tessellation evaluation shader, the per-vertex stage inputs + // are included in a special patch control point array. + if (vars.empty() && !(storage == StorageClassInput && patch && stage_in_var_id)) + return 0; + + // Add a new typed variable for this interface structure. + // The initializer expression is allocated here, but populated when the function + // declaraion is emitted, because it is cleared after each compilation pass. + uint32_t next_id = ir.increase_bound_by(3); + uint32_t ib_type_id = next_id++; + auto &ib_type = set(ib_type_id); + ib_type.basetype = SPIRType::Struct; + ib_type.storage = storage; + set_decoration(ib_type_id, DecorationBlock); + + uint32_t ib_var_id = next_id++; + auto &var = set(ib_var_id, ib_type_id, storage, 0); + var.initializer = next_id++; + + string ib_var_ref; + auto &entry_func = get(ir.default_entry_point); + switch (storage) + { + case StorageClassInput: + ib_var_ref = patch ? patch_stage_in_var_name : stage_in_var_name; + if (get_execution_model() == ExecutionModelTessellationControl) + { + // Add a hook to populate the shared workgroup memory containing the gl_in array. + entry_func.fixup_hooks_in.push_back([=]() { + // Can't use PatchVertices, PrimitiveId, or InvocationId yet; the hooks for those may not have run yet. + if (msl_options.multi_patch_workgroup) + { + // n.b. builtin_invocation_id_id here is the dispatch global invocation ID, + // not the TC invocation ID. + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_in = &", + input_buffer_var_name, "[min(", to_expression(builtin_invocation_id_id), ".x / ", + get_entry_point().output_vertices, + ", spvIndirectParams[1] - 1) * spvIndirectParams[0]];"); + } + else + { + // It's safe to use InvocationId here because it's directly mapped to a + // Metal builtin, and therefore doesn't need a hook. + statement("if (", to_expression(builtin_invocation_id_id), " < spvIndirectParams[0])"); + statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), + "] = ", ib_var_ref, ";"); + statement("threadgroup_barrier(mem_flags::mem_threadgroup);"); + statement("if (", to_expression(builtin_invocation_id_id), + " >= ", get_entry_point().output_vertices, ")"); + statement(" return;"); + } + }); + } + break; + + case StorageClassOutput: + { + ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name; + + // Add the output interface struct as a local variable to the entry function. + // If the entry point should return the output struct, set the entry function + // to return the output interface struct, otherwise to return nothing. + // Indicate the output var requires early initialization. + bool ep_should_return_output = !get_is_rasterization_disabled(); + uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0; + if (!capture_output_to_buffer) + { + entry_func.add_local_variable(ib_var_id); + for (auto &blk_id : entry_func.blocks) + { + auto &blk = get(blk_id); + if (blk.terminator == SPIRBlock::Return) + blk.return_value = rtn_id; + } + vars_needing_early_declaration.push_back(ib_var_id); + } + else + { + switch (get_execution_model()) + { + case ExecutionModelVertex: + case ExecutionModelTessellationEvaluation: + // Instead of declaring a struct variable to hold the output and then + // copying that to the output buffer, we'll declare the output variable + // as a reference to the final output element in the buffer. Then we can + // avoid the extra copy. + entry_func.fixup_hooks_in.push_back([=]() { + if (stage_out_var_id) + { + // The first member of the indirect buffer is always the number of vertices + // to draw. + // We zero-base the InstanceID & VertexID variables for HLSL emulation elsewhere, so don't do it twice + if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation) + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), + ".y * ", to_expression(builtin_stage_input_size_id), ".x + ", + to_expression(builtin_invocation_id_id), ".x];"); + } + else if (msl_options.enable_base_index_zero) + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[", to_expression(builtin_instance_idx_id), + " * spvIndirectParams[0] + ", to_expression(builtin_vertex_idx_id), "];"); + } + else + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), + " - ", to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", + to_expression(builtin_vertex_idx_id), " - ", + to_expression(builtin_base_vertex_id), "];"); + } + } + }); + break; + case ExecutionModelTessellationControl: + if (msl_options.multi_patch_workgroup) + { + // We cannot use PrimitiveId here, because the hook may not have run yet. + if (patch) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", patch_output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), + ".x / ", get_entry_point().output_vertices, "];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &", + output_buffer_var_name, "[", to_expression(builtin_invocation_id_id), ".x - ", + to_expression(builtin_invocation_id_id), ".x % ", + get_entry_point().output_vertices, "];"); + }); + } + } + else + { + if (patch) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "];"); + }); + } + else + { + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &", + output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ", + get_entry_point().output_vertices, "];"); + }); + } + } + break; + default: + break; + } + } + break; + } + + default: + break; + } + + set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref); + set_name(ib_var_id, ib_var_ref); + + for (auto *p_var : vars) + { + bool strip_array = + (get_execution_model() == ExecutionModelTessellationControl || + (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) && + !patch; + + meta.strip_array = strip_array; + add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, meta); + } + + if (get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup && + storage == StorageClassInput) + { + // For tessellation control inputs, add all outputs from the vertex shader to ensure + // the struct containing them is the correct size and layout. + for (auto &input : inputs_by_location) + { + if (is_msl_shader_input_used(input.first)) + continue; + + // Create a fake variable to put at the location. + uint32_t offset = ir.increase_bound_by(4); + uint32_t type_id = offset; + uint32_t array_type_id = offset + 1; + uint32_t ptr_type_id = offset + 2; + uint32_t var_id = offset + 3; + + SPIRType type; + switch (input.second.format) + { + case MSL_SHADER_INPUT_FORMAT_UINT16: + case MSL_SHADER_INPUT_FORMAT_ANY16: + type.basetype = SPIRType::UShort; + type.width = 16; + break; + case MSL_SHADER_INPUT_FORMAT_ANY32: + default: + type.basetype = SPIRType::UInt; + type.width = 32; + break; + } + type.vecsize = input.second.vecsize; + set(type_id, type); + + type.array.push_back(0); + type.array_size_literal.push_back(true); + type.parent_type = type_id; + set(array_type_id, type); + + type.pointer = true; + type.parent_type = array_type_id; + type.storage = storage; + auto &ptr_type = set(ptr_type_id, type); + ptr_type.self = array_type_id; + + auto &fake_var = set(var_id, ptr_type_id, storage); + set_decoration(var_id, DecorationLocation, input.first); + meta.strip_array = true; + add_variable_to_interface_block(storage, ib_var_ref, ib_type, fake_var, meta); + } + } + + // Sort the members of the structure by their locations. + MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Location); + member_sorter.sort(); + + // The member indices were saved to the original variables, but after the members + // were sorted, those indices are now likely incorrect. Fix those up now. + if (!patch) + fix_up_interface_member_indices(storage, ib_type_id); + + // For patch inputs, add one more member, holding the array of control point data. + if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && patch && + stage_in_var_id) + { + uint32_t pcp_type_id = ir.increase_bound_by(1); + auto &pcp_type = set(pcp_type_id, ib_type); + pcp_type.basetype = SPIRType::ControlPointArray; + pcp_type.parent_type = pcp_type.type_alias = get_stage_in_struct_type().self; + pcp_type.storage = storage; + ir.meta[pcp_type_id] = ir.meta[ib_type.self]; + uint32_t mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(pcp_type_id); + set_member_name(ib_type.self, mbr_idx, "gl_in"); + } + + return ib_var_id; +} + +uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageClass storage) +{ + if (!ib_var_id) + return 0; + + uint32_t ib_ptr_var_id; + uint32_t next_id = ir.increase_bound_by(3); + auto &ib_type = expression_type(ib_var_id); + if (get_execution_model() == ExecutionModelTessellationControl) + { + // Tessellation control per-vertex I/O is presented as an array, so we must + // do the same with our struct here. + uint32_t ib_ptr_type_id = next_id++; + auto &ib_ptr_type = set(ib_ptr_type_id, ib_type); + ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self; + ib_ptr_type.pointer = true; + ib_ptr_type.storage = + storage == StorageClassInput ? + (msl_options.multi_patch_workgroup ? StorageClassStorageBuffer : StorageClassWorkgroup) : + StorageClassStorageBuffer; + ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self]; + // To ensure that get_variable_data_type() doesn't strip off the pointer, + // which we need, use another pointer. + uint32_t ib_ptr_ptr_type_id = next_id++; + auto &ib_ptr_ptr_type = set(ib_ptr_ptr_type_id, ib_ptr_type); + ib_ptr_ptr_type.parent_type = ib_ptr_type_id; + ib_ptr_ptr_type.type_alias = ib_type.self; + ib_ptr_ptr_type.storage = StorageClassFunction; + ir.meta[ib_ptr_ptr_type_id] = ir.meta[ib_type.self]; + + ib_ptr_var_id = next_id; + set(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0); + set_name(ib_ptr_var_id, storage == StorageClassInput ? "gl_in" : "gl_out"); + } + else + { + // Tessellation evaluation per-vertex inputs are also presented as arrays. + // But, in Metal, this array uses a very special type, 'patch_control_point', + // which is a container that can be used to access the control point data. + // To represent this, a special 'ControlPointArray' type has been added to the + // SPIRV-Cross type system. It should only be generated by and seen in the MSL + // backend (i.e. this one). + uint32_t pcp_type_id = next_id++; + auto &pcp_type = set(pcp_type_id, ib_type); + pcp_type.basetype = SPIRType::ControlPointArray; + pcp_type.parent_type = pcp_type.type_alias = ib_type.self; + pcp_type.storage = storage; + ir.meta[pcp_type_id] = ir.meta[ib_type.self]; + + ib_ptr_var_id = next_id; + set(ib_ptr_var_id, pcp_type_id, storage, 0); + set_name(ib_ptr_var_id, "gl_in"); + ir.meta[ib_ptr_var_id].decoration.qualified_alias = join(patch_stage_in_var_name, ".gl_in"); + } + return ib_ptr_var_id; +} + +// Ensure that the type is compatible with the builtin. +// If it is, simply return the given type ID. +// Otherwise, create a new type, and return it's ID. +uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn builtin) +{ + auto &type = get(type_id); + + if ((builtin == BuiltInSampleMask && is_array(type)) || + ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex || builtin == BuiltInFragStencilRefEXT) && + type.basetype != SPIRType::UInt)) + { + uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1); + uint32_t base_type_id = next_id++; + auto &base_type = set(base_type_id); + base_type.basetype = SPIRType::UInt; + base_type.width = 32; + + if (!type.pointer) + return base_type_id; + + uint32_t ptr_type_id = next_id++; + auto &ptr_type = set(ptr_type_id); + ptr_type = base_type; + ptr_type.pointer = true; + ptr_type.storage = type.storage; + ptr_type.parent_type = base_type_id; + return ptr_type_id; + } + + return type_id; +} + +// Ensure that the type is compatible with the shader input. +// If it is, simply return the given type ID. +// Otherwise, create a new type, and return its ID. +uint32_t CompilerMSL::ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components) +{ + auto &type = get(type_id); + + auto p_va = inputs_by_location.find(location); + if (p_va == end(inputs_by_location)) + { + if (num_components > type.vecsize) + return build_extended_vector_type(type_id, num_components); + else + return type_id; + } + + if (num_components == 0) + num_components = p_va->second.vecsize; + + switch (p_va->second.format) + { + case MSL_SHADER_INPUT_FORMAT_UINT8: + { + switch (type.basetype) + { + case SPIRType::UByte: + case SPIRType::UShort: + case SPIRType::UInt: + if (num_components > type.vecsize) + return build_extended_vector_type(type_id, num_components); + else + return type_id; + + case SPIRType::Short: + return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize, + SPIRType::UShort); + case SPIRType::Int: + return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize, + SPIRType::UInt); + + default: + SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader"); + } + } + + case MSL_SHADER_INPUT_FORMAT_UINT16: + { + switch (type.basetype) + { + case SPIRType::UShort: + case SPIRType::UInt: + if (num_components > type.vecsize) + return build_extended_vector_type(type_id, num_components); + else + return type_id; + + case SPIRType::Int: + return build_extended_vector_type(type_id, num_components > type.vecsize ? num_components : type.vecsize, + SPIRType::UInt); + + default: + SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader"); + } + } + + default: + if (num_components > type.vecsize) + type_id = build_extended_vector_type(type_id, num_components); + break; + } + + return type_id; +} + +void CompilerMSL::mark_struct_members_packed(const SPIRType &type) +{ + set_extended_decoration(type.self, SPIRVCrossDecorationPhysicalTypePacked); + + // Problem case! Struct needs to be placed at an awkward alignment. + // Mark every member of the child struct as packed. + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) + { + auto &mbr_type = get(type.member_types[i]); + if (mbr_type.basetype == SPIRType::Struct) + { + // Recursively mark structs as packed. + auto *struct_type = &mbr_type; + while (!struct_type->array.empty()) + struct_type = &get(struct_type->parent_type); + mark_struct_members_packed(*struct_type); + } + else if (!is_scalar(mbr_type)) + set_extended_member_decoration(type.self, i, SPIRVCrossDecorationPhysicalTypePacked); + } +} + +void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type) +{ + uint32_t mbr_cnt = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) + { + auto &mbr_type = get(type.member_types[i]); + if (mbr_type.basetype == SPIRType::Struct) + { + auto *struct_type = &mbr_type; + while (!struct_type->array.empty()) + struct_type = &get(struct_type->parent_type); + + if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPhysicalTypePacked)) + continue; + + uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, i); + uint32_t msl_size = get_declared_struct_member_size_msl(type, i); + uint32_t spirv_offset = type_struct_member_offset(type, i); + uint32_t spirv_offset_next; + if (i + 1 < mbr_cnt) + spirv_offset_next = type_struct_member_offset(type, i + 1); + else + spirv_offset_next = spirv_offset + msl_size; + + // Both are complicated cases. In scalar layout, a struct of float3 might just consume 12 bytes, + // and the next member will be placed at offset 12. + bool struct_is_misaligned = (spirv_offset % msl_alignment) != 0; + bool struct_is_too_large = spirv_offset + msl_size > spirv_offset_next; + uint32_t array_stride = 0; + bool struct_needs_explicit_padding = false; + + // Verify that if a struct is used as an array that ArrayStride matches the effective size of the struct. + if (!mbr_type.array.empty()) + { + array_stride = type_struct_member_array_stride(type, i); + uint32_t dimensions = uint32_t(mbr_type.array.size() - 1); + for (uint32_t dim = 0; dim < dimensions; dim++) + { + uint32_t array_size = to_array_size_literal(mbr_type, dim); + array_stride /= max(array_size, 1u); + } + + // Set expected struct size based on ArrayStride. + struct_needs_explicit_padding = true; + + // If struct size is larger than array stride, we might be able to fit, if we tightly pack. + if (get_declared_struct_size_msl(*struct_type) > array_stride) + struct_is_too_large = true; + } + + if (struct_is_misaligned || struct_is_too_large) + mark_struct_members_packed(*struct_type); + mark_scalar_layout_structs(*struct_type); + + if (struct_needs_explicit_padding) + { + msl_size = get_declared_struct_size_msl(*struct_type, true, true); + if (array_stride < msl_size) + { + SPIRV_CROSS_THROW("Cannot express an array stride smaller than size of struct type."); + } + else + { + if (has_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget)) + { + if (array_stride != + get_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget)) + SPIRV_CROSS_THROW( + "A struct is used with different array strides. Cannot express this in MSL."); + } + else + set_extended_decoration(struct_type->self, SPIRVCrossDecorationPaddingTarget, array_stride); + } + } + } + } +} + +// Sort the members of the struct type by offset, and pack and then pad members where needed +// to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing +// occurs first, followed by padding, because packing a member reduces both its size and its +// natural alignment, possibly requiring a padding member to be added ahead of it. +void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set &aligned_structs) +{ + // We align structs recursively, so stop any redundant work. + ID &ib_type_id = ib_type.self; + if (aligned_structs.count(ib_type_id)) + return; + aligned_structs.insert(ib_type_id); + + // Sort the members of the interface structure by their offset. + // They should already be sorted per SPIR-V spec anyway. + MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset); + member_sorter.sort(); + + auto mbr_cnt = uint32_t(ib_type.member_types.size()); + + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + // Pack any dependent struct types before we pack a parent struct. + auto &mbr_type = get(ib_type.member_types[mbr_idx]); + if (mbr_type.basetype == SPIRType::Struct) + align_struct(mbr_type, aligned_structs); + } + + // Test the alignment of each member, and if a member should be closer to the previous + // member than the default spacing expects, it is likely that the previous member is in + // a packed format. If so, and the previous member is packable, pack it. + // For example ... this applies to any 3-element vector that is followed by a scalar. + uint32_t msl_offset = 0; + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + // This checks the member in isolation, if the member needs some kind of type remapping to conform to SPIR-V + // offsets, array strides and matrix strides. + ensure_member_packing_rules_msl(ib_type, mbr_idx); + + // Align current offset to the current member's default alignment. If the member was packed, it will observe + // the updated alignment here. + uint32_t msl_align_mask = get_declared_struct_member_alignment_msl(ib_type, mbr_idx) - 1; + uint32_t aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask; + + // Fetch the member offset as declared in the SPIRV. + uint32_t spirv_mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset); + if (spirv_mbr_offset > aligned_msl_offset) + { + // Since MSL and SPIR-V have slightly different struct member alignment and + // size rules, we'll pad to standard C-packing rules with a char[] array. If the member is farther + // away than C-packing, expects, add an inert padding member before the the member. + uint32_t padding_bytes = spirv_mbr_offset - aligned_msl_offset; + set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPaddingTarget, padding_bytes); + + // Re-align as a sanity check that aligning post-padding matches up. + msl_offset += padding_bytes; + aligned_msl_offset = (msl_offset + msl_align_mask) & ~msl_align_mask; + } + else if (spirv_mbr_offset < aligned_msl_offset) + { + // This should not happen, but deal with unexpected scenarios. + // It *might* happen if a sub-struct has a larger alignment requirement in MSL than SPIR-V. + SPIRV_CROSS_THROW("Cannot represent buffer block correctly in MSL."); + } + + assert(aligned_msl_offset == spirv_mbr_offset); + + // Increment the current offset to be positioned immediately after the current member. + // Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here. + if (mbr_idx + 1 < mbr_cnt) + msl_offset = aligned_msl_offset + get_declared_struct_member_size_msl(ib_type, mbr_idx); + } +} + +bool CompilerMSL::validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const +{ + auto &mbr_type = get(type.member_types[index]); + uint32_t spirv_offset = get_member_decoration(type.self, index, DecorationOffset); + + if (index + 1 < type.member_types.size()) + { + // First, we will check offsets. If SPIR-V offset + MSL size > SPIR-V offset of next member, + // we *must* perform some kind of remapping, no way getting around it. + // We can always pad after this member if necessary, so that case is fine. + uint32_t spirv_offset_next = get_member_decoration(type.self, index + 1, DecorationOffset); + assert(spirv_offset_next >= spirv_offset); + uint32_t maximum_size = spirv_offset_next - spirv_offset; + uint32_t msl_mbr_size = get_declared_struct_member_size_msl(type, index); + if (msl_mbr_size > maximum_size) + return false; + } + + if (!mbr_type.array.empty()) + { + // If we have an array type, array stride must match exactly with SPIR-V. + + // An exception to this requirement is if we have one array element. + // This comes from DX scalar layout workaround. + // If app tries to be cheeky and access the member out of bounds, this will not work, but this is the best we can do. + // In OpAccessChain with logical memory models, access chains must be in-bounds in SPIR-V specification. + bool relax_array_stride = mbr_type.array.back() == 1 && mbr_type.array_size_literal.back(); + + if (!relax_array_stride) + { + uint32_t spirv_array_stride = type_struct_member_array_stride(type, index); + uint32_t msl_array_stride = get_declared_struct_member_array_stride_msl(type, index); + if (spirv_array_stride != msl_array_stride) + return false; + } + } + + if (is_matrix(mbr_type)) + { + // Need to check MatrixStride as well. + uint32_t spirv_matrix_stride = type_struct_member_matrix_stride(type, index); + uint32_t msl_matrix_stride = get_declared_struct_member_matrix_stride_msl(type, index); + if (spirv_matrix_stride != msl_matrix_stride) + return false; + } + + // Now, we check alignment. + uint32_t msl_alignment = get_declared_struct_member_alignment_msl(type, index); + if ((spirv_offset % msl_alignment) != 0) + return false; + + // We're in the clear. + return true; +} + +// Here we need to verify that the member type we declare conforms to Offset, ArrayStride or MatrixStride restrictions. +// If there is a mismatch, we need to emit remapped types, either normal types, or "packed_X" types. +// In odd cases we need to emit packed and remapped types, for e.g. weird matrices or arrays with weird array strides. +void CompilerMSL::ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index) +{ + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We failed validation. + // This case will be nightmare-ish to deal with. This could possibly happen if struct alignment does not quite + // match up with what we want. Scalar block layout comes to mind here where we might have to work around the rule + // that struct alignment == max alignment of all members and struct size depends on this alignment. + auto &mbr_type = get(ib_type.member_types[index]); + if (mbr_type.basetype == SPIRType::Struct) + SPIRV_CROSS_THROW("Cannot perform any repacking for structs when it is used as a member of another struct."); + + // Perform remapping here. + // There is nothing to be gained by using packed scalars, so don't attempt it. + if (!is_scalar(ib_type)) + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + + // Try validating again, now with packed. + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We're in deep trouble, and we need to create a new PhysicalType which matches up with what we expect. + // A lot of work goes here ... + // We will need remapping on Load and Store to translate the types between Logical and Physical. + + // First, we check if we have small vector std140 array. + // We detect this if we have an array of vectors, and array stride is greater than number of elements. + if (!mbr_type.array.empty() && !is_matrix(mbr_type)) + { + uint32_t array_stride = type_struct_member_array_stride(ib_type, index); + + // Hack off array-of-arrays until we find the array stride per element we must have to make it work. + uint32_t dimensions = uint32_t(mbr_type.array.size() - 1); + for (uint32_t dim = 0; dim < dimensions; dim++) + array_stride /= max(to_array_size_literal(mbr_type, dim), 1u); + + uint32_t elems_per_stride = array_stride / (mbr_type.width / 8); + + if (elems_per_stride == 3) + SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); + else if (elems_per_stride > 4) + SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL."); + + auto physical_type = mbr_type; + physical_type.vecsize = elems_per_stride; + physical_type.parent_type = 0; + uint32_t type_id = ir.increase_bound_by(1); + set(type_id, physical_type); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); + set_decoration(type_id, DecorationArrayStride, array_stride); + + // Remove packed_ for vectors of size 1, 2 and 4. + if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked)) + SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix " + "scalar and std140 layout rules."); + else + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + } + else if (is_matrix(mbr_type)) + { + // MatrixStride might be std140-esque. + uint32_t matrix_stride = type_struct_member_matrix_stride(ib_type, index); + + uint32_t elems_per_stride = matrix_stride / (mbr_type.width / 8); + + if (elems_per_stride == 3) + SPIRV_CROSS_THROW("Cannot use ArrayStride of 3 elements in remapping scenarios."); + else if (elems_per_stride > 4) + SPIRV_CROSS_THROW("Cannot represent vectors with more than 4 elements in MSL."); + + bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor); + + auto physical_type = mbr_type; + physical_type.parent_type = 0; + if (row_major) + physical_type.columns = elems_per_stride; + else + physical_type.vecsize = elems_per_stride; + uint32_t type_id = ir.increase_bound_by(1); + set(type_id, physical_type); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID, type_id); + + // Remove packed_ for vectors of size 1, 2 and 4. + if (has_extended_decoration(ib_type.self, SPIRVCrossDecorationPhysicalTypePacked)) + SPIRV_CROSS_THROW("Unable to remove packed decoration as entire struct must be fully packed. Do not mix " + "scalar and std140 layout rules."); + else + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + } + else + SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL."); + + // Try validating again, now with physical type remapping. + if (validate_member_packing_rules_msl(ib_type, index)) + return; + + // We might have a particular odd scalar layout case where the last element of an array + // does not take up as much space as the ArrayStride or MatrixStride. This can happen with DX cbuffers. + // The "proper" workaround for this is extremely painful and essentially impossible in the edge case of float3[], + // so we hack around it by declaring the offending array or matrix with one less array size/col/row, + // and rely on padding to get the correct value. We will technically access arrays out of bounds into the padding region, + // but it should spill over gracefully without too much trouble. We rely on behavior like this for unsized arrays anyways. + + // E.g. we might observe a physical layout of: + // { float2 a[2]; float b; } in cbuffer layout where ArrayStride of a is 16, but offset of b is 24, packed right after a[1] ... + uint32_t type_id = get_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID); + auto &type = get(type_id); + + // Modify the physical type in-place. This is safe since each physical type workaround is a copy. + if (is_array(type)) + { + if (type.array.back() > 1) + { + if (!type.array_size_literal.back()) + SPIRV_CROSS_THROW("Cannot apply scalar layout workaround with spec constant array size."); + type.array.back() -= 1; + } + else + { + // We have an array of size 1, so we cannot decrement that. Our only option now is to + // force a packed layout instead, and drop the physical type remap since ArrayStride is meaningless now. + unset_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypeID); + set_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPhysicalTypePacked); + } + } + else if (is_matrix(type)) + { + bool row_major = has_member_decoration(ib_type.self, index, DecorationRowMajor); + if (!row_major) + { + // Slice off one column. If we only have 2 columns, this might turn the matrix into a vector with one array element instead. + if (type.columns > 2) + { + type.columns--; + } + else if (type.columns == 2) + { + type.columns = 1; + assert(type.array.empty()); + type.array.push_back(1); + type.array_size_literal.push_back(true); + } + } + else + { + // Slice off one row. If we only have 2 rows, this might turn the matrix into a vector with one array element instead. + if (type.vecsize > 2) + { + type.vecsize--; + } + else if (type.vecsize == 2) + { + type.vecsize = type.columns; + type.columns = 1; + assert(type.array.empty()); + type.array.push_back(1); + type.array_size_literal.push_back(true); + } + } + } + + // This better validate now, or we must fail gracefully. + if (!validate_member_packing_rules_msl(ib_type, index)) + SPIRV_CROSS_THROW("Found a buffer packing case which we cannot represent in MSL."); +} + +void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) +{ + auto &type = expression_type(rhs_expression); + + bool lhs_remapped_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID); + bool lhs_packed_type = has_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypePacked); + auto *lhs_e = maybe_get(lhs_expression); + auto *rhs_e = maybe_get(rhs_expression); + + bool transpose = lhs_e && lhs_e->need_transpose; + + // No physical type remapping, and no packed type, so can just emit a store directly. + if (!lhs_remapped_type && !lhs_packed_type) + { + // We might not be dealing with remapped physical types or packed types, + // but we might be doing a clean store to a row-major matrix. + // In this case, we just flip transpose states, and emit the store, a transpose must be in the RHS expression, if any. + if (is_matrix(type) && lhs_e && lhs_e->need_transpose) + { + if (!rhs_e) + SPIRV_CROSS_THROW("Need to transpose right-side expression of a store to row-major matrix, but it is " + "not a SPIRExpression."); + lhs_e->need_transpose = false; + + if (rhs_e && rhs_e->need_transpose) + { + // Direct copy, but might need to unpack RHS. + // Skip the transpose, as we will transpose when writing to LHS and transpose(transpose(T)) == T. + rhs_e->need_transpose = false; + statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression), + ";"); + rhs_e->need_transpose = true; + } + else + statement(to_expression(lhs_expression), " = transpose(", to_unpacked_expression(rhs_expression), ");"); + + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else if (lhs_e && lhs_e->need_transpose) + { + lhs_e->need_transpose = false; + + // Storing a column to a row-major matrix. Unroll the write. + for (uint32_t c = 0; c < type.vecsize; c++) + { + auto lhs_expr = to_dereferenced_expression(lhs_expression); + auto column_index = lhs_expr.find_last_of('['); + if (column_index != string::npos) + { + statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ", + to_extract_component_expression(rhs_expression, c), ";"); + } + } + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else + CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); + } + else if (!lhs_remapped_type && !is_matrix(type) && !transpose) + { + // Even if the target type is packed, we can directly store to it. We cannot store to packed matrices directly, + // since they are declared as array of vectors instead, and we need the fallback path below. + CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); + } + else + { + // Special handling when storing to a remapped physical type. + // This is mostly to deal with std140 padded matrices or vectors. + + TypeID physical_type_id = lhs_remapped_type ? + ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) : + type.self; + + auto &physical_type = get(physical_type_id); + + if (is_matrix(type)) + { + const char *packed_pfx = lhs_packed_type ? "packed_" : ""; + + // Packed matrices are stored as arrays of packed vectors, so we need + // to assign the vectors one at a time. + // For row-major matrices, we need to transpose the *right-hand* side, + // not the left-hand side. + + // Lots of cases to cover here ... + + bool rhs_transpose = rhs_e && rhs_e->need_transpose; + SPIRType write_type = type; + string cast_expr; + + // We're dealing with transpose manually. + if (rhs_transpose) + rhs_e->need_transpose = false; + + if (transpose) + { + // We're dealing with transpose manually. + lhs_e->need_transpose = false; + write_type.vecsize = type.columns; + write_type.columns = 1; + + if (physical_type.columns != type.columns) + cast_expr = join("(device ", packed_pfx, type_to_glsl(write_type), "&)"); + + if (rhs_transpose) + { + // If RHS is also transposed, we can just copy row by row. + for (uint32_t i = 0; i < type.vecsize; i++) + { + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", + to_unpacked_row_major_matrix_expression(rhs_expression), "[", i, "];"); + } + } + else + { + auto vector_type = expression_type(rhs_expression); + vector_type.vecsize = vector_type.columns; + vector_type.columns = 1; + + // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad, + // so pick out individual components instead. + for (uint32_t i = 0; i < type.vecsize; i++) + { + string rhs_row = type_to_glsl_constructor(vector_type) + "("; + for (uint32_t j = 0; j < vector_type.vecsize; j++) + { + rhs_row += join(to_enclosed_unpacked_expression(rhs_expression), "[", j, "][", i, "]"); + if (j + 1 < vector_type.vecsize) + rhs_row += ", "; + } + rhs_row += ")"; + + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";"); + } + } + + // We're dealing with transpose manually. + lhs_e->need_transpose = true; + } + else + { + write_type.columns = 1; + + if (physical_type.vecsize != type.vecsize) + cast_expr = join("(device ", packed_pfx, type_to_glsl(write_type), "&)"); + + if (rhs_transpose) + { + auto vector_type = expression_type(rhs_expression); + vector_type.columns = 1; + + // Transpose on the fly. Emitting a lot of full transpose() ops and extracting lanes seems very bad, + // so pick out individual components instead. + for (uint32_t i = 0; i < type.columns; i++) + { + string rhs_row = type_to_glsl_constructor(vector_type) + "("; + for (uint32_t j = 0; j < vector_type.vecsize; j++) + { + // Need to explicitly unpack expression since we've mucked with transpose state. + auto unpacked_expr = to_unpacked_row_major_matrix_expression(rhs_expression); + rhs_row += join(unpacked_expr, "[", j, "][", i, "]"); + if (j + 1 < vector_type.vecsize) + rhs_row += ", "; + } + rhs_row += ")"; + + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", rhs_row, ";"); + } + } + else + { + // Copy column-by-column. + for (uint32_t i = 0; i < type.columns; i++) + { + statement(cast_expr, to_enclosed_expression(lhs_expression), "[", i, "]", " = ", + to_enclosed_unpacked_expression(rhs_expression), "[", i, "];"); + } + } + } + + // We're dealing with transpose manually. + if (rhs_transpose) + rhs_e->need_transpose = true; + } + else if (transpose) + { + lhs_e->need_transpose = false; + + SPIRType write_type = type; + write_type.vecsize = 1; + write_type.columns = 1; + + // Storing a column to a row-major matrix. Unroll the write. + for (uint32_t c = 0; c < type.vecsize; c++) + { + auto lhs_expr = to_enclosed_expression(lhs_expression); + auto column_index = lhs_expr.find_last_of('['); + if (column_index != string::npos) + { + statement("((device ", type_to_glsl(write_type), "*)&", + lhs_expr.insert(column_index, join('[', c, ']', ")")), " = ", + to_extract_component_expression(rhs_expression, c), ";"); + } + } + + lhs_e->need_transpose = true; + } + else if ((is_matrix(physical_type) || is_array(physical_type)) && physical_type.vecsize > type.vecsize) + { + assert(type.vecsize >= 1 && type.vecsize <= 3); + + // If we have packed types, we cannot use swizzled stores. + // We could technically unroll the store for each element if needed. + // When remapping to a std140 physical type, we always get float4, + // and the packed decoration should always be removed. + assert(!lhs_packed_type); + + string lhs = to_dereferenced_expression(lhs_expression); + string rhs = to_pointer_expression(rhs_expression); + + // Unpack the expression so we can store to it with a float or float2. + // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead. + lhs = join("(device ", type_to_glsl(type), "&)", enclose_expression(lhs)); + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + else if (!is_matrix(type)) + { + string lhs = to_dereferenced_expression(lhs_expression); + string rhs = to_pointer_expression(rhs_expression); + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + + register_write(lhs_expression); + } +} + +static bool expression_ends_with(const string &expr_str, const std::string &ending) +{ + if (expr_str.length() >= ending.length()) + return (expr_str.compare(expr_str.length() - ending.length(), ending.length(), ending) == 0); + else + return false; +} + +// Converts the format of the current expression from packed to unpacked, +// by wrapping the expression in a constructor of the appropriate type. +// Also, handle special physical ID remapping scenarios, similar to emit_store_statement(). +string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool packed, bool row_major) +{ + // Trivial case, nothing to do. + if (physical_type_id == 0 && !packed) + return expr_str; + + const SPIRType *physical_type = nullptr; + if (physical_type_id) + physical_type = &get(physical_type_id); + + static const char *swizzle_lut[] = { + ".x", + ".xy", + ".xyz", + }; + + if (physical_type && is_vector(*physical_type) && is_array(*physical_type) && + physical_type->vecsize > type.vecsize && !expression_ends_with(expr_str, swizzle_lut[type.vecsize - 1])) + { + // std140 array cases for vectors. + assert(type.vecsize >= 1 && type.vecsize <= 3); + return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1]; + } + else if (physical_type && is_matrix(*physical_type) && is_vector(type) && physical_type->vecsize > type.vecsize) + { + // Extract column from padded matrix. + assert(type.vecsize >= 1 && type.vecsize <= 3); + return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1]; + } + else if (is_matrix(type)) + { + // Packed matrices are stored as arrays of packed vectors. Unfortunately, + // we can't just pass the array straight to the matrix constructor. We have to + // pass each vector individually, so that they can be unpacked to normal vectors. + if (!physical_type) + physical_type = &type; + + uint32_t vecsize = type.vecsize; + uint32_t columns = type.columns; + if (row_major) + swap(vecsize, columns); + + uint32_t physical_vecsize = row_major ? physical_type->columns : physical_type->vecsize; + + const char *base_type = type.width == 16 ? "half" : "float"; + string unpack_expr = join(base_type, columns, "x", vecsize, "("); + + const char *load_swiz = ""; + + if (physical_vecsize != vecsize) + load_swiz = swizzle_lut[vecsize - 1]; + + for (uint32_t i = 0; i < columns; i++) + { + if (i > 0) + unpack_expr += ", "; + + if (packed) + unpack_expr += join(base_type, physical_vecsize, "(", expr_str, "[", i, "]", ")", load_swiz); + else + unpack_expr += join(expr_str, "[", i, "]", load_swiz); + } + + unpack_expr += ")"; + return unpack_expr; + } + else + { + return join(type_to_glsl(type), "(", expr_str, ")"); + } +} + +// Emits the file header info +void CompilerMSL::emit_header() +{ + // This particular line can be overridden during compilation, so make it a flag and not a pragma line. + if (suppress_missing_prototypes) + statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + + // Disable warning about missing braces for array template to make arrays a value type + if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0) + statement("#pragma clang diagnostic ignored \"-Wmissing-braces\""); + + for (auto &pragma : pragma_lines) + statement(pragma); + + if (!pragma_lines.empty() || suppress_missing_prototypes) + statement(""); + + statement("#include "); + statement("#include "); + + for (auto &header : header_lines) + statement(header); + + statement(""); + statement("using namespace metal;"); + statement(""); + + for (auto &td : typedef_lines) + statement(td); + + if (!typedef_lines.empty()) + statement(""); +} + +void CompilerMSL::add_pragma_line(const string &line) +{ + auto rslt = pragma_lines.insert(line); + if (rslt.second) + force_recompile(); +} + +void CompilerMSL::add_typedef_line(const string &line) +{ + auto rslt = typedef_lines.insert(line); + if (rslt.second) + force_recompile(); +} + +// Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared +void CompilerMSL::emit_custom_templates() +{ + for (const auto &spv_func : spv_function_implementations) + { + switch (spv_func) + { + case SPVFuncImplUnsafeArray: + statement("template"); + statement("struct spvUnsafeArray"); + begin_scope(); + statement("T elements[Num ? Num : 1];"); + statement(""); + statement("thread T& operator [] (size_t pos) thread"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const thread T& operator [] (size_t pos) const thread"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("device T& operator [] (size_t pos) device"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const device T& operator [] (size_t pos) const device"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("constexpr const constant T& operator [] (size_t pos) const constant"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("threadgroup T& operator [] (size_t pos) threadgroup"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const threadgroup T& operator [] (size_t pos) const threadgroup"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + end_scope_decl(); + statement(""); + break; + + default: + break; + } + } +} + +// Emits any needed custom function bodies. +// Metal helper functions must be static force-inline, i.e. static inline __attribute__((always_inline)) +// otherwise they will cause problems when linked together in a single Metallib. +void CompilerMSL::emit_custom_functions() +{ + for (uint32_t i = kArrayCopyMultidimMax; i >= 2; i--) + if (spv_function_implementations.count(static_cast(SPVFuncImplArrayCopyMultidimBase + i))) + spv_function_implementations.insert(static_cast(SPVFuncImplArrayCopyMultidimBase + i - 1)); + + if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler)) + { + // Unfortunately, this one needs a lot of the other functions to compile OK. + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW( + "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0."); + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplTextureSwizzle); + if (msl_options.swizzle_texture_samples) + spv_function_implementations.insert(SPVFuncImplGatherSwizzle); + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + spv_function_implementations.insert(static_cast(i)); + spv_function_implementations.insert(SPVFuncImplExpandITUFullRange); + spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020); + } + + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + if (spv_function_implementations.count(static_cast(i))) + spv_function_implementations.insert(SPVFuncImplForwardArgs); + + if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle)) + { + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplGetSwizzle); + } + + for (const auto &spv_func : spv_function_implementations) + { + switch (spv_func) + { + case SPVFuncImplMod: + statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()"); + statement("template"); + statement("inline Tx mod(Tx x, Ty y)"); + begin_scope(); + statement("return x - y * floor(x / y);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRadians: + statement("// Implementation of the GLSL radians() function"); + statement("template"); + statement("inline T radians(T d)"); + begin_scope(); + statement("return d * T(0.01745329251);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplDegrees: + statement("// Implementation of the GLSL degrees() function"); + statement("template"); + statement("inline T degrees(T r)"); + begin_scope(); + statement("return r * T(57.2957795131);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplFindILsb: + statement("// Implementation of the GLSL findLSB() function"); + statement("template"); + statement("inline T spvFindLSB(T x)"); + begin_scope(); + statement("return select(ctz(x), T(-1), x == T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplFindUMsb: + statement("// Implementation of the unsigned GLSL findMSB() function"); + statement("template"); + statement("inline T spvFindUMSB(T x)"); + begin_scope(); + statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplFindSMsb: + statement("// Implementation of the signed GLSL findMSB() function"); + statement("template"); + statement("inline T spvFindSMSB(T x)"); + begin_scope(); + statement("T v = select(x, T(-1) - x, x < T(0));"); + statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSSign: + statement("// Implementation of the GLSL sign() function for integer types"); + statement("template::value>::type>"); + statement("inline T sign(T x)"); + begin_scope(); + statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplArrayCopy: + case SPVFuncImplArrayOfArrayCopy2Dim: + case SPVFuncImplArrayOfArrayCopy3Dim: + case SPVFuncImplArrayOfArrayCopy4Dim: + case SPVFuncImplArrayOfArrayCopy5Dim: + case SPVFuncImplArrayOfArrayCopy6Dim: + { + // Unfortunately we cannot template on the address space, so combinatorial explosion it is. + static const char *function_name_tags[] = { + "FromConstantToStack", "FromConstantToThreadGroup", "FromStackToStack", + "FromStackToThreadGroup", "FromThreadGroupToStack", "FromThreadGroupToThreadGroup", + "FromDeviceToDevice", "FromConstantToDevice", "FromStackToDevice", + "FromThreadGroupToDevice", "FromDeviceToStack", "FromDeviceToThreadGroup", + }; + + static const char *src_address_space[] = { + "constant", "constant", "thread const", "thread const", + "threadgroup const", "threadgroup const", "device const", "constant", + "thread const", "threadgroup const", "device const", "device const", + }; + + static const char *dst_address_space[] = { + "thread", "threadgroup", "thread", "threadgroup", "thread", "threadgroup", + "device", "device", "device", "device", "thread", "threadgroup", + }; + + for (uint32_t variant = 0; variant < 12; variant++) + { + uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase; + string tmp = "template 0) + { + string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width); + statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); + statement(force_inline); + statement("uint2 spvTexelBufferCoord(uint tc)"); + begin_scope(); + statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");")); + end_scope(); + statement(""); + } + else + { + statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); + statement( + "#define spvTexelBufferCoord(tc, tex) uint2((tc) % (tex).get_width(), (tc) / (tex).get_width())"); + statement(""); + } + break; + } + + // Emulate texture2D atomic operations + case SPVFuncImplImage2DAtomicCoords: + { + if (msl_options.supports_msl_version(1, 2)) + { + statement("// The required alignment of a linear texture of R32Uint format."); + statement("constant uint spvLinearTextureAlignmentOverride [[function_constant(", + msl_options.r32ui_alignment_constant_id, ")]];"); + statement("constant uint spvLinearTextureAlignment = ", + "is_function_constant_defined(spvLinearTextureAlignmentOverride) ? ", + "spvLinearTextureAlignmentOverride : ", msl_options.r32ui_linear_texture_alignment, ";"); + } + else + { + statement("// The required alignment of a linear texture of R32Uint format."); + statement("constant uint spvLinearTextureAlignment = ", msl_options.r32ui_linear_texture_alignment, + ";"); + } + statement("// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics"); + statement("#define spvImage2DAtomicCoord(tc, tex) (((((tex).get_width() + ", + " spvLinearTextureAlignment / 4 - 1) & ~(", + " spvLinearTextureAlignment / 4 - 1)) * (tc).y) + (tc).x)"); + statement(""); + break; + } + + // "fadd" intrinsic support + case SPVFuncImplFAdd: + statement("template"); + statement("T spvFAdd(T l, T r)"); + begin_scope(); + statement("return fma(T(1), l, r);"); + end_scope(); + statement(""); + break; + + // "fmul' intrinsic support + case SPVFuncImplFMul: + statement("template"); + statement("T spvFMul(T l, T r)"); + begin_scope(); + statement("return fma(l, r, T(0));"); + end_scope(); + statement(""); + + statement("template"); + statement("vec spvFMulVectorMatrix(vec v, matrix m)"); + begin_scope(); + statement("vec res = vec(0);"); + statement("for (uint i = Rows; i > 0; --i)"); + begin_scope(); + statement("vec tmp(0);"); + statement("for (uint j = 0; j < Cols; ++j)"); + begin_scope(); + statement("tmp[j] = m[j][i - 1];"); + end_scope(); + statement("res = fma(tmp, vec(v[i - 1]), res);"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + + statement("template"); + statement("vec spvFMulMatrixVector(matrix m, vec v)"); + begin_scope(); + statement("vec res = vec(0);"); + statement("for (uint i = Cols; i > 0; --i)"); + begin_scope(); + statement("res = fma(m[i - 1], vec(v[i - 1]), res);"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + + statement("template"); + statement( + "matrix spvFMulMatrixMatrix(matrix l, matrix r)"); + begin_scope(); + statement("matrix res;"); + statement("for (uint i = 0; i < RCols; i++)"); + begin_scope(); + statement("vec tmp(0);"); + statement("for (uint j = 0; j < LCols; j++)"); + begin_scope(); + statement("tmp = fma(vec(r[i][j]), l[j], tmp);"); + end_scope(); + statement("res[i] = tmp;"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + break; + + // Emulate texturecube_array with texture2d_array for iOS where this type is not available + case SPVFuncImplCubemapTo2DArrayFace: + statement(force_inline); + statement("float3 spvCubemapTo2DArrayFace(float3 P)"); + begin_scope(); + statement("float3 Coords = abs(P.xyz);"); + statement("float CubeFace = 0;"); + statement("float ProjectionAxis = 0;"); + statement("float u = 0;"); + statement("float v = 0;"); + statement("if (Coords.x >= Coords.y && Coords.x >= Coords.z)"); + begin_scope(); + statement("CubeFace = P.x >= 0 ? 0 : 1;"); + statement("ProjectionAxis = Coords.x;"); + statement("u = P.x >= 0 ? -P.z : P.z;"); + statement("v = -P.y;"); + end_scope(); + statement("else if (Coords.y >= Coords.x && Coords.y >= Coords.z)"); + begin_scope(); + statement("CubeFace = P.y >= 0 ? 2 : 3;"); + statement("ProjectionAxis = Coords.y;"); + statement("u = P.x;"); + statement("v = P.y >= 0 ? P.z : -P.z;"); + end_scope(); + statement("else"); + begin_scope(); + statement("CubeFace = P.z >= 0 ? 4 : 5;"); + statement("ProjectionAxis = Coords.z;"); + statement("u = P.z >= 0 ? P.x : -P.x;"); + statement("v = -P.y;"); + end_scope(); + statement("u = 0.5 * (u/ProjectionAxis + 1);"); + statement("v = 0.5 * (v/ProjectionAxis + 1);"); + statement("return float3(u, v, CubeFace);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplInverse4x4: + statement("// Returns the determinant of a 2x2 matrix."); + statement(force_inline); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement(""); + + statement("// Returns the determinant of a 3x3 matrix."); + statement(force_inline); + statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + "float c2, float c3)"); + begin_scope(); + statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, " + "b2, b3);"); + end_scope(); + statement(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement(force_inline); + statement("float4x4 spvInverse4x4(float4x4 m)"); + begin_scope(); + statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement("adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement("adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement("adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement("adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement("adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement("adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement("adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement("adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement("adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement("adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement("adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " + "m[2][3]);"); + statement_no_indent(""); + statement("adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement("adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement("adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement("adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " + "m[2][2]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] " + "* m[3][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplInverse3x3: + if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement(force_inline); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement(""); + } + + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement(force_inline); + statement("float3x3 spvInverse3x3(float3x3 m)"); + begin_scope(); + statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); + statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); + statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); + statement_no_indent(""); + statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); + statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); + statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); + statement_no_indent(""); + statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); + statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); + statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplInverse2x2: + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement(force_inline); + statement("float2x2 spvInverse2x2(float2x2 m)"); + begin_scope(); + statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = m[1][1];"); + statement("adj[0][1] = -m[0][1];"); + statement_no_indent(""); + statement("adj[1][0] = -m[1][0];"); + statement("adj[1][1] = m[0][0];"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplForwardArgs: + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template struct spvRemoveReference { typedef T type; };"); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type& x)"); + begin_scope(); + statement("return static_cast(x);"); + end_scope(); + statement("template inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference::type&& x)"); + begin_scope(); + statement("return static_cast(x);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplGetSwizzle: + statement("enum class spvSwizzle : uint"); + begin_scope(); + statement("none = 0,"); + statement("zero,"); + statement("one,"); + statement("red,"); + statement("green,"); + statement("blue,"); + statement("alpha"); + end_scope_decl(); + statement(""); + statement("template"); + statement("inline T spvGetSwizzle(vec x, T c, spvSwizzle s)"); + begin_scope(); + statement("switch (s)"); + begin_scope(); + statement("case spvSwizzle::none:"); + statement(" return c;"); + statement("case spvSwizzle::zero:"); + statement(" return 0;"); + statement("case spvSwizzle::one:"); + statement(" return 1;"); + statement("case spvSwizzle::red:"); + statement(" return x.r;"); + statement("case spvSwizzle::green:"); + statement(" return x.g;"); + statement("case spvSwizzle::blue:"); + statement(" return x.b;"); + statement("case spvSwizzle::alpha:"); + statement(" return x.a;"); + end_scope(); + end_scope(); + statement(""); + break; + + case SPVFuncImplTextureSwizzle: + statement("// Wrapper function that swizzles texture samples and fetches."); + statement("template"); + statement("inline vec spvTextureSwizzle(vec x, uint s)"); + begin_scope(); + statement("if (!s)"); + statement(" return x;"); + statement("return vec(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), " + "spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) " + "& 0xFF)), " + "spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));"); + end_scope(); + statement(""); + statement("template"); + statement("inline T spvTextureSwizzle(T x, uint s)"); + begin_scope(); + statement("return spvTextureSwizzle(vec(x, 0, 0, 1), s).x;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplGatherSwizzle: + statement("// Wrapper function that swizzles texture gathers."); + statement("template class Tex, " + "typename... Ts>"); + statement("inline vec spvGatherSwizzle(const thread Tex& t, sampler s, " + "uint sw, component c, Ts... params) METAL_CONST_ARG(c)"); + begin_scope(); + statement("if (sw)"); + begin_scope(); + statement("switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))"); + begin_scope(); + statement("case spvSwizzle::none:"); + statement(" break;"); + statement("case spvSwizzle::zero:"); + statement(" return vec(0, 0, 0, 0);"); + statement("case spvSwizzle::one:"); + statement(" return vec(1, 1, 1, 1);"); + statement("case spvSwizzle::red:"); + statement(" return t.gather(s, spvForward(params)..., component::x);"); + statement("case spvSwizzle::green:"); + statement(" return t.gather(s, spvForward(params)..., component::y);"); + statement("case spvSwizzle::blue:"); + statement(" return t.gather(s, spvForward(params)..., component::z);"); + statement("case spvSwizzle::alpha:"); + statement(" return t.gather(s, spvForward(params)..., component::w);"); + end_scope(); + end_scope(); + // texture::gather insists on its component parameter being a constant + // expression, so we need this silly workaround just to compile the shader. + statement("switch (c)"); + begin_scope(); + statement("case component::x:"); + statement(" return t.gather(s, spvForward(params)..., component::x);"); + statement("case component::y:"); + statement(" return t.gather(s, spvForward(params)..., component::y);"); + statement("case component::z:"); + statement(" return t.gather(s, spvForward(params)..., component::z);"); + statement("case component::w:"); + statement(" return t.gather(s, spvForward(params)..., component::w);"); + end_scope(); + end_scope(); + statement(""); + break; + + case SPVFuncImplGatherCompareSwizzle: + statement("// Wrapper function that swizzles depth texture gathers."); + statement("template class Tex, " + "typename... Ts>"); + statement("inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler " + "s, uint sw, Ts... params) "); + begin_scope(); + statement("if (sw)"); + begin_scope(); + statement("switch (spvSwizzle(sw & 0xFF))"); + begin_scope(); + statement("case spvSwizzle::none:"); + statement("case spvSwizzle::red:"); + statement(" break;"); + statement("case spvSwizzle::zero:"); + statement("case spvSwizzle::green:"); + statement("case spvSwizzle::blue:"); + statement("case spvSwizzle::alpha:"); + statement(" return vec(0, 0, 0, 0);"); + statement("case spvSwizzle::one:"); + statement(" return vec(1, 1, 1, 1);"); + end_scope(); + end_scope(); + statement("return t.gather_compare(s, spvForward(params)...);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBroadcast: + // Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting + // them as integers. + statement("template"); + statement("inline T spvSubgroupBroadcast(T value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_broadcast(value, lane);"); + else + statement("return simd_broadcast(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupBroadcast(bool value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return !!quad_broadcast((ushort)value, lane);"); + else + statement("return !!simd_broadcast((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupBroadcast(vec value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return (vec)quad_broadcast((vec)value, lane);"); + else + statement("return (vec)simd_broadcast((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBroadcastFirst: + statement("template"); + statement("inline T spvSubgroupBroadcastFirst(T value)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_broadcast_first(value);"); + else + statement("return simd_broadcast_first(value);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupBroadcastFirst(bool value)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return !!quad_broadcast_first((ushort)value);"); + else + statement("return !!simd_broadcast_first((ushort)value);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupBroadcastFirst(vec value)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return (vec)quad_broadcast_first((vec)value);"); + else + statement("return (vec)simd_broadcast_first((vec)value);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallot: + statement("inline uint4 spvSubgroupBallot(bool value)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + { + statement("return uint4((quad_vote::vote_t)quad_ballot(value), 0, 0, 0);"); + } + else if (msl_options.is_ios()) + { + // The current simd_vote on iOS uses a 32-bit integer-like object. + statement("return uint4((simd_vote::vote_t)simd_ballot(value), 0, 0, 0);"); + } + else + { + statement("simd_vote vote = simd_ballot(value);"); + statement("// simd_ballot() returns a 64-bit integer-like object, but"); + statement("// SPIR-V callers expect a uint4. We must convert."); + statement("// FIXME: This won't include higher bits if Apple ever supports"); + statement("// 128 lanes in an SIMD-group."); + statement( + "return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> " + "32) & 0xFFFFFFFF), 0, 0);"); + } + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotBitExtract: + statement("inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)"); + begin_scope(); + statement("return !!extract_bits(ballot[bit / 32], bit % 32, 1);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotFindLSB: + statement("inline uint spvSubgroupBallotFindLSB(uint4 ballot, uint gl_SubgroupSize)"); + begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } + statement("ballot &= mask;"); + statement("return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + " + "ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotFindMSB: + statement("inline uint spvSubgroupBallotFindMSB(uint4 ballot, uint gl_SubgroupSize)"); + begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } + statement("ballot &= mask;"); + statement("return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - " + "(clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), " + "ballot.z == 0), ballot.w == 0);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupBallotBitCount: + statement("inline uint spvPopCount4(uint4 ballot)"); + begin_scope(); + statement("return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);"); + end_scope(); + statement(""); + statement("inline uint spvSubgroupBallotBitCount(uint4 ballot, uint gl_SubgroupSize)"); + begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupSize), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupSize, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupSize - 32, 0)), uint2(0));"); + } + statement("return spvPopCount4(ballot & mask);"); + end_scope(); + statement(""); + statement("inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); + begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID + 1), uint3(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), " + "uint2(0));"); + } + statement("return spvPopCount4(ballot & mask);"); + end_scope(); + statement(""); + statement("inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)"); + begin_scope(); + if (msl_options.is_ios()) + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, gl_SubgroupInvocationID), uint2(0));"); + } + else + { + statement("uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), " + "extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));"); + } + statement("return spvPopCount4(ballot & mask);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupAllEqual: + // Metal doesn't provide a function to evaluate this directly. But, we can + // implement this by comparing every thread's value to one thread's value + // (in this case, the value of the first active thread). Then, by the transitive + // property of equality, if all comparisons return true, then they are all equal. + statement("template"); + statement("inline bool spvSubgroupAllEqual(T value)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_all(all(value == quad_broadcast_first(value)));"); + else + statement("return simd_all(all(value == simd_broadcast_first(value)));"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupAllEqual(bool value)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_all(value) || !quad_any(value);"); + else + statement("return simd_all(value) || !simd_any(value);"); + end_scope(); + statement(""); + statement("template"); + statement("inline bool spvSubgroupAllEqual(vec value)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_all(all(value == (vec)quad_broadcast_first((vec)value)));"); + else + statement("return simd_all(all(value == (vec)simd_broadcast_first((vec)value)));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffle: + statement("template"); + statement("inline T spvSubgroupShuffle(T value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_shuffle(value, lane);"); + else + statement("return simd_shuffle(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffle(bool value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return !!quad_shuffle((ushort)value, lane);"); + else + statement("return !!simd_shuffle((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffle(vec value, ushort lane)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return (vec)quad_shuffle((vec)value, lane);"); + else + statement("return (vec)simd_shuffle((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffleXor: + statement("template"); + statement("inline T spvSubgroupShuffleXor(T value, ushort mask)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_shuffle_xor(value, mask);"); + else + statement("return simd_shuffle_xor(value, mask);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleXor(bool value, ushort mask)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return !!quad_shuffle_xor((ushort)value, mask);"); + else + statement("return !!simd_shuffle_xor((ushort)value, mask);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleXor(vec value, ushort mask)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return (vec)quad_shuffle_xor((vec)value, mask);"); + else + statement("return (vec)simd_shuffle_xor((vec)value, mask);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffleUp: + statement("template"); + statement("inline T spvSubgroupShuffleUp(T value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_shuffle_up(value, delta);"); + else + statement("return simd_shuffle_up(value, delta);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleUp(bool value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return !!quad_shuffle_up((ushort)value, delta);"); + else + statement("return !!simd_shuffle_up((ushort)value, delta);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleUp(vec value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return (vec)quad_shuffle_up((vec)value, delta);"); + else + statement("return (vec)simd_shuffle_up((vec)value, delta);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSubgroupShuffleDown: + statement("template"); + statement("inline T spvSubgroupShuffleDown(T value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return quad_shuffle_down(value, delta);"); + else + statement("return simd_shuffle_down(value, delta);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupShuffleDown(bool value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return !!quad_shuffle_down((ushort)value, delta);"); + else + statement("return !!simd_shuffle_down((ushort)value, delta);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupShuffleDown(vec value, ushort delta)"); + begin_scope(); + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + statement("return (vec)quad_shuffle_down((vec)value, delta);"); + else + statement("return (vec)simd_shuffle_down((vec)value, delta);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplQuadBroadcast: + statement("template"); + statement("inline T spvQuadBroadcast(T value, uint lane)"); + begin_scope(); + statement("return quad_broadcast(value, lane);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvQuadBroadcast(bool value, uint lane)"); + begin_scope(); + statement("return !!quad_broadcast((ushort)value, lane);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvQuadBroadcast(vec value, uint lane)"); + begin_scope(); + statement("return (vec)quad_broadcast((vec)value, lane);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplQuadSwap: + // We can implement this easily based on the following table giving + // the target lane ID from the direction and current lane ID: + // Direction + // | 0 | 1 | 2 | + // ---+---+---+---+ + // L 0 | 1 2 3 + // a 1 | 0 3 2 + // n 2 | 3 0 1 + // e 3 | 2 1 0 + // Notice that target = source ^ (direction + 1). + statement("template"); + statement("inline T spvQuadSwap(T value, uint dir)"); + begin_scope(); + statement("return quad_shuffle_xor(value, dir + 1);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvQuadSwap(bool value, uint dir)"); + begin_scope(); + statement("return !!quad_shuffle_xor((ushort)value, dir + 1);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvQuadSwap(vec value, uint dir)"); + begin_scope(); + statement("return (vec)quad_shuffle_xor((vec)value, dir + 1);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplReflectScalar: + // Metal does not support scalar versions of these functions. + statement("template"); + statement("inline T spvReflect(T i, T n)"); + begin_scope(); + statement("return i - T(2) * i * n * n;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRefractScalar: + // Metal does not support scalar versions of these functions. + statement("template"); + statement("inline T spvRefract(T i, T n, T eta)"); + begin_scope(); + statement("T NoI = n * i;"); + statement("T NoI2 = NoI * NoI;"); + statement("T k = T(1) - eta * eta * (T(1) - NoI2);"); + statement("if (k < T(0))"); + begin_scope(); + statement("return T(0);"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + break; + + case SPVFuncImplFaceForwardScalar: + // Metal does not support scalar versions of these functions. + statement("template"); + statement("inline T spvFaceForward(T n, T i, T nref)"); + begin_scope(); + statement("return i * nref < T(0) ? n : -n;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructNearest2Plane: + statement("template"); + statement("inline vec spvChromaReconstructNearest(texture2d plane0, texture2d plane1, sampler " + "samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructNearest3Plane: + statement("template"); + statement("inline vec spvChromaReconstructNearest(texture2d plane0, texture2d plane1, " + "texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422CositedEven(texture2d plane0, texture2d " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).rg);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422CositedEven(texture2d plane0, texture2d " + "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422Midpoint(texture2d plane0, texture2d " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422Midpoint(texture2d plane0, texture2d " + "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYCositedEven(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYCositedEven(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYMidpoint(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYMidpoint(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUFullRange: + statement("template"); + statement("inline vec spvExpandITUFullRange(vec ycbcr, int n)"); + begin_scope(); + statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUNarrowRange: + statement("template"); + statement("inline vec spvExpandITUNarrowRange(vec ycbcr, int n)"); + begin_scope(); + statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);"); + statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT709: + statement("// cf. Khronos Data Format Specification, section 15.1.1"); + statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, " + "-0.33480248/0.7152, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT709(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT709Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT601: + statement("// cf. Khronos Data Format Specification, section 15.1.2"); + statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, " + "-0.419198/0.587, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT601(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT601Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT2020: + statement("// cf. Khronos Data Format Specification, section 15.1.3"); + statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, " + "-0.38737742/0.6780, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT2020(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT2020Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplDynamicImageSampler: + statement("enum class spvFormatResolution"); + begin_scope(); + statement("_444 = 0,"); + statement("_422,"); + statement("_420"); + end_scope_decl(); + statement(""); + statement("enum class spvChromaFilter"); + begin_scope(); + statement("nearest = 0,"); + statement("linear"); + end_scope_decl(); + statement(""); + statement("enum class spvXChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrModelConversion"); + begin_scope(); + statement("rgb_identity = 0,"); + statement("ycbcr_identity,"); + statement("ycbcr_bt_709,"); + statement("ycbcr_bt_601,"); + statement("ycbcr_bt_2020"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrRange"); + begin_scope(); + statement("itu_full = 0,"); + statement("itu_narrow"); + end_scope_decl(); + statement(""); + statement("struct spvComponentBits"); + begin_scope(); + statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}"); + statement("uchar value : 6;"); + end_scope_decl(); + statement("// A class corresponding to metal::sampler which holds sampler"); + statement("// Y'CbCr conversion info."); + statement("struct spvYCbCrSampler"); + begin_scope(); + statement("constexpr spvYCbCrSampler() thread : val(build()) {}"); + statement("template"); + statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}"); + statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;"); + statement(""); + statement("spvFormatResolution get_resolution() const thread"); + begin_scope(); + statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);"); + end_scope(); + statement("spvChromaFilter get_chroma_filter() const thread"); + begin_scope(); + statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);"); + end_scope(); + statement("spvXChromaLocation get_x_chroma_offset() const thread"); + begin_scope(); + statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);"); + end_scope(); + statement("spvYChromaLocation get_y_chroma_offset() const thread"); + begin_scope(); + statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);"); + end_scope(); + statement("spvYCbCrModelConversion get_ycbcr_model() const thread"); + begin_scope(); + statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);"); + end_scope(); + statement("spvYCbCrRange get_ycbcr_range() const thread"); + begin_scope(); + statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);"); + end_scope(); + statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }"); + statement(""); + statement("private:"); + statement("ushort val;"); + statement(""); + statement("constexpr static constant ushort resolution_bits = 2;"); + statement("constexpr static constant ushort chroma_filter_bits = 2;"); + statement("constexpr static constant ushort x_chroma_off_bit = 1;"); + statement("constexpr static constant ushort y_chroma_off_bit = 1;"); + statement("constexpr static constant ushort ycbcr_model_bits = 3;"); + statement("constexpr static constant ushort ycbcr_range_bit = 1;"); + statement("constexpr static constant ushort bpc_bits = 6;"); + statement(""); + statement("constexpr static constant ushort resolution_base = 0;"); + statement("constexpr static constant ushort chroma_filter_base = 2;"); + statement("constexpr static constant ushort x_chroma_off_base = 4;"); + statement("constexpr static constant ushort y_chroma_off_base = 5;"); + statement("constexpr static constant ushort ycbcr_model_base = 6;"); + statement("constexpr static constant ushort ycbcr_range_base = 9;"); + statement("constexpr static constant ushort bpc_base = 10;"); + statement(""); + statement( + "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;"); + statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << " + "chroma_filter_base;"); + statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << " + "x_chroma_off_base;"); + statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << " + "y_chroma_off_base;"); + statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << " + "ycbcr_model_base;"); + statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << " + "ycbcr_range_base;"); + statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;"); + statement(""); + statement("static constexpr ushort build()"); + begin_scope(); + statement("return 0;"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvFormatResolution res, Ts... t)"); + begin_scope(); + statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)"); + begin_scope(); + statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)"); + begin_scope(); + statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)"); + begin_scope(); + statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)"); + begin_scope(); + statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);"); + end_scope(); + end_scope_decl(); + statement(""); + statement("// A class which can hold up to three textures and a sampler, including"); + statement("// Y'CbCr conversion info, used to pass combined image-samplers"); + statement("// dynamically to functions."); + statement("template"); + statement("struct spvDynamicImageSampler"); + begin_scope(); + statement("texture2d plane0;"); + statement("texture2d plane1;"); + statement("texture2d plane2;"); + statement("sampler samp;"); + statement("spvYCbCrSampler ycbcr_samp;"); + statement("uint swizzle = 0;"); + statement(""); + if (msl_options.swizzle_texture_samples) + { + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp, uint sw) thread :"); + statement(" plane0(tex), samp(samp), swizzle(sw) {}"); + } + else + { + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp) thread :"); + statement(" plane0(tex), samp(samp) {}"); + } + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp, spvYCbCrSampler ycbcr_samp, " + "uint sw) thread :"); + statement(" plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement("constexpr spvDynamicImageSampler(texture2d plane0, texture2d plane1,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement( + "constexpr spvDynamicImageSampler(texture2d plane0, texture2d plane1, texture2d plane2,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), " + "swizzle(sw) {}"); + statement(""); + // XXX This is really hard to follow... I've left comments to make it a bit easier. + statement("template"); + statement("vec do_sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement("if (!is_null_texture(plane1))"); + begin_scope(); + statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||"); + statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)"); + begin_scope(); + statement("if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,"); + statement(" spvForward(options)...);"); + statement( + "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward(options)...);"); + end_scope(); // if (resolution == 422 || chroma_filter == nearest) + statement("switch (ycbcr_samp.get_resolution())"); + begin_scope(); + statement("case spvFormatResolution::_444: break;"); + statement("case spvFormatResolution::_422:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvXChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 422: + statement("case spvFormatResolution::_420:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::cosited_even: + statement("case spvXChromaLocation::midpoint:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::midpoint + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 420: + end_scope(); // switch (resolution) + end_scope(); // if (multiplanar) + statement("return plane0.sample(samp, coord, spvForward(options)...);"); + end_scope(); // do_sample() + statement("template "); + statement("vec sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement( + "vec s = spvTextureSwizzle(do_sample(coord, spvForward(options)...), swizzle);"); + statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)"); + statement(" return s;"); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_range())"); + begin_scope(); + statement("case spvYCbCrRange::itu_full:"); + statement(" s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + statement("case spvYCbCrRange::itu_narrow:"); + statement(" s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + end_scope(); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_model())"); + begin_scope(); + statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning + statement("case spvYCbCrModelConversion::ycbcr_identity:"); + statement(" return s;"); + statement("case spvYCbCrModelConversion::ycbcr_bt_709:"); + statement(" return spvConvertYCbCrBT709(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_601:"); + statement(" return spvConvertYCbCrBT601(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_2020:"); + statement(" return spvConvertYCbCrBT2020(s);"); + end_scope(); + end_scope(); + statement(""); + // Sampler Y'CbCr conversion forbids offsets. + statement("vec sample(float2 coord, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, offset);"); + end_scope(); + statement("template"); + statement("vec sample(float2 coord, lod_options options, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, options, offset);"); + end_scope(); + statement("#if __HAVE_MIN_LOD_CLAMP__"); + statement("vec sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, b, min_lod, offset);"); + end_scope(); + statement( + "vec sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, grad, min_lod, offset);"); + end_scope(); + statement("#endif"); + statement(""); + // Y'CbCr conversion forbids all operations but sampling. + statement("vec read(uint2 coord, uint lod = 0) const thread"); + begin_scope(); + statement("return plane0.read(coord, lod);"); + end_scope(); + statement(""); + statement("vec gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);"); + else + statement("return plane0.gather(samp, coord, offset, c);"); + end_scope(); + end_scope_decl(); + statement(""); + + default: + break; + } + } +} + +// Undefined global memory is not allowed in MSL. +// Declare constant and init to zeros. Use {}, as global constructors can break Metal. +void CompilerMSL::declare_undefined_values() +{ + bool emitted = false; + ir.for_each_typed_id([&](uint32_t, SPIRUndef &undef) { + auto &type = this->get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + + statement("constant ", variable_decl(type, to_name(undef.self), undef.self), " = {};"); + emitted = true; + }); + + if (emitted) + statement(""); +} + +void CompilerMSL::declare_constant_arrays() +{ + bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1; + + // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to + // global constants directly, so we are able to use constants as variable expressions. + bool emitted = false; + + ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; + + auto &type = this->get(c.constant_type); + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries. + // FIXME: However, hoisting constants to main() means we need to pass down constant arrays to leaf functions if they are used there. + // If there are multiple functions in the module, drop this case to avoid breaking use cases which do not need to + // link into Metal libraries. This is hacky. + if (!type.array.empty() && (!fully_inlined || is_scalar(type) || is_vector(type))) + { + auto name = to_name(c.self); + statement("constant ", variable_decl(type, name), " = ", constant_expression(c), ";"); + emitted = true; + } + }); + + if (emitted) + statement(""); +} + +// Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries +void CompilerMSL::declare_complex_constant_arrays() +{ + // If we do not have a fully inlined module, we did not opt in to + // declaring constant arrays of complex types. See CompilerMSL::declare_constant_arrays(). + bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1; + if (!fully_inlined) + return; + + // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to + // global constants directly, so we are able to use constants as variable expressions. + bool emitted = false; + + ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; + + auto &type = this->get(c.constant_type); + if (!type.array.empty() && !(is_scalar(type) || is_vector(type))) + { + auto name = to_name(c.self); + statement("", variable_decl(type, name), " = ", constant_expression(c), ";"); + emitted = true; + } + }); + + if (emitted) + statement(""); +} + +void CompilerMSL::emit_resources() +{ + declare_constant_arrays(); + declare_undefined_values(); + + // Emit the special [[stage_in]] and [[stage_out]] interface blocks which we created. + emit_interface_block(stage_out_var_id); + emit_interface_block(patch_stage_out_var_id); + emit_interface_block(stage_in_var_id); + emit_interface_block(patch_stage_in_var_id); +} + +// Emit declarations for the specialization Metal function constants +void CompilerMSL::emit_specialization_constants_and_structs() +{ + SpecializationConstant wg_x, wg_y, wg_z; + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + bool emitted = false; + + unordered_set declared_structs; + unordered_set aligned_structs; + + // First, we need to deal with scalar block layout. + // It is possible that a struct may have to be placed at an alignment which does not match the innate alignment of the struct itself. + // In that case, if such a case exists for a struct, we must force that all elements of the struct become packed_ types. + // This makes the struct alignment as small as physically possible. + // When we actually align the struct later, we can insert padding as necessary to make the packed members behave like normally aligned types. + ir.for_each_typed_id([&](uint32_t type_id, const SPIRType &type) { + if (type.basetype == SPIRType::Struct && + has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked)) + mark_scalar_layout_structs(type); + }); + + // Very particular use of the soft loop lock. + // align_struct may need to create custom types on the fly, but we don't care about + // these types for purpose of iterating over them in ir.ids_for_type and friends. + auto loop_lock = ir.create_loop_soft_lock(); + + for (auto &id_ : ir.ids_for_constant_or_type) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + + if (c.self == workgroup_size_id) + { + // TODO: This can be expressed as a [[threads_per_threadgroup]] input semantic, but we need to know + // the work group size at compile time in SPIR-V, and [[threads_per_threadgroup]] would need to be passed around as a global. + // The work group size may be a specialization constant. + statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup), + " [[maybe_unused]] = ", constant_expression(get(workgroup_size_id)), ";"); + emitted = true; + } + else if (c.specialization) + { + auto &type = get(c.constant_type); + string sc_type_name = type_to_glsl(type); + string sc_name = to_name(c.self); + string sc_tmp_name = sc_name + "_tmp"; + + // Function constants are only supported in MSL 1.2 and later. + // If we don't support it just declare the "default" directly. + // This "default" value can be overridden to the true specialization constant by the API user. + // Specialization constants which are used as array length expressions cannot be function constants in MSL, + // so just fall back to macros. + if (msl_options.supports_msl_version(1, 2) && has_decoration(c.self, DecorationSpecId) && + !c.is_used_as_array_length) + { + uint32_t constant_id = get_decoration(c.self, DecorationSpecId); + // Only scalar, non-composite values can be function constants. + statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id, + ")]];"); + statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name, + ") ? ", sc_tmp_name, " : ", constant_expression(c), ";"); + } + else if (has_decoration(c.self, DecorationSpecId)) + { + // Fallback to macro overrides. + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name, + ";"); + } + else + { + // Composite specialization constants must be built from other specialization constants. + statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";"); + } + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + auto &c = id.get(); + auto &type = get(c.basetype); + auto name = to_name(c.self); + statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + emitted = true; + } + else if (id.get_type() == TypeType) + { + // Output non-builtin interface structs. These include local function structs + // and structs nested within uniform and read-write buffers. + auto &type = id.get(); + TypeID type_id = type.self; + + bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer; + bool is_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + bool is_builtin_block = is_block && is_builtin_type(type); + bool is_declarable_struct = is_struct && !is_builtin_block; + + // We'll declare this later. + if (stage_out_var_id && get_stage_out_struct_type().self == type_id) + is_declarable_struct = false; + if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id) + is_declarable_struct = false; + if (stage_in_var_id && get_stage_in_struct_type().self == type_id) + is_declarable_struct = false; + if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id) + is_declarable_struct = false; + + // Align and emit declarable structs...but avoid declaring each more than once. + if (is_declarable_struct && declared_structs.count(type_id) == 0) + { + if (emitted) + statement(""); + emitted = false; + + declared_structs.insert(type_id); + + if (has_extended_decoration(type_id, SPIRVCrossDecorationBufferBlockRepacked)) + align_struct(type, aligned_structs); + + // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc. + emit_struct(get(type_id)); + } + } + } + + if (emitted) + statement(""); +} + +void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, + join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1), + ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1), + ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id, uint32_t ptr) +{ + auto &ptr_type = expression_type(ptr); + auto &result_type = get(result_type_id); + if (ptr_type.storage != StorageClassInput && ptr_type.storage != StorageClassOutput) + return false; + if (ptr_type.storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationEvaluation) + return false; + + bool multi_patch_tess_ctl = get_execution_model() == ExecutionModelTessellationControl && + msl_options.multi_patch_workgroup && ptr_type.storage == StorageClassInput; + bool flat_matrix = is_matrix(result_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl; + bool flat_struct = result_type.basetype == SPIRType::Struct && ptr_type.storage == StorageClassInput; + bool flat_data_type = flat_matrix || is_array(result_type) || flat_struct; + if (!flat_data_type) + return false; + + if (has_decoration(ptr, DecorationPatch)) + return false; + + // Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out. + // Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup + // unless we're forced to do this when the code is emitting inoptimal OpLoads. + string expr; + + uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex); + auto *var = maybe_get_backing_variable(ptr); + bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable; + auto &expr_type = get_pointee_type(ptr_type.self); + + const auto &iface_type = expression_type(stage_in_ptr_var_id); + + if (result_type.array.size() > 2) + { + SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions."); + } + else if (result_type.array.size() == 2) + { + if (!ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable."); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + if (result_type.basetype == SPIRType::Struct || flat_matrix) + SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO."); + + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, 1); + uint32_t base_interface_index = interface_index; + + auto &sub_type = get(result_type.parent_type); + + for (uint32_t i = 0; i < num_control_points; i++) + { + expr += type_to_glsl(sub_type) + "({ "; + interface_index = base_interface_index; + uint32_t array_size = to_array_size_literal(result_type, 0); + if (multi_patch_tess_ctl) + { + for (uint32_t j = 0; j < array_size; j++) + { + const uint32_t indices[3] = { i, interface_index, j }; + + AccessChainMeta meta; + expr += + access_chain_internal(stage_in_ptr_var_id, indices, 3, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + // If the expression has more vector components than the result type, insert + // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might + // happen if we replace the type of an input variable. + if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && + expr_type.vecsize > sub_type.vecsize) + expr += vector_swizzle(sub_type.vecsize, 0); + + if (j + 1 < array_size) + expr += ", "; + } + } + else + { + for (uint32_t j = 0; j < array_size; j++, interface_index++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += + access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (!is_matrix(sub_type) && sub_type.basetype != SPIRType::Struct && + expr_type.vecsize > sub_type.vecsize) + expr += vector_swizzle(sub_type.vecsize, 0); + + if (j + 1 < array_size) + expr += ", "; + } + } + expr += " })"; + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else if (flat_struct) + { + bool is_array_of_struct = is_array(result_type); + if (is_array_of_struct && !ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading array of struct from IO variable must come directly from IO variable."); + + uint32_t num_control_points = 1; + if (is_array_of_struct) + { + num_control_points = to_array_size_literal(result_type, 0); + expr += type_to_glsl(result_type) + "({ "; + } + + auto &struct_type = is_array_of_struct ? get(result_type.parent_type) : result_type; + assert(struct_type.array.empty()); + + for (uint32_t i = 0; i < num_control_points; i++) + { + expr += type_to_glsl(struct_type) + "{ "; + for (uint32_t j = 0; j < uint32_t(struct_type.member_types.size()); j++) + { + // The base interface index is stored per variable for structs. + if (var) + { + interface_index = + get_extended_member_decoration(var->self, j, SPIRVCrossDecorationInterfaceMemberIndex); + } + + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + const auto &mbr_type = get(struct_type.member_types[j]); + const auto &expr_mbr_type = get(expr_type.member_types[j]); + if (is_matrix(mbr_type) && ptr_type.storage == StorageClassInput && !multi_patch_tess_ctl) + { + expr += type_to_glsl(mbr_type) + "("; + for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++) + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); + + if (k + 1 < mbr_type.columns) + expr += ", "; + } + expr += ")"; + } + else if (is_array(mbr_type)) + { + expr += type_to_glsl(mbr_type) + "({ "; + uint32_t array_size = to_array_size_literal(mbr_type, 0); + if (multi_patch_tess_ctl) + { + for (uint32_t k = 0; k < array_size; k++) + { + if (is_array_of_struct) + { + const uint32_t indices[3] = { i, interface_index, k }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 3, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + } + else + expr += join(to_expression(ptr), ".", to_member_name(iface_type, interface_index), "[", + k, "]"); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); + + if (k + 1 < array_size) + expr += ", "; + } + } + else + { + for (uint32_t k = 0; k < array_size; k++, interface_index++) + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); + + if (k + 1 < array_size) + expr += ", "; + } + } + expr += " })"; + } + else + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, + &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_mbr_type.vecsize > mbr_type.vecsize) + expr += vector_swizzle(mbr_type.vecsize, 0); + } + + if (j + 1 < struct_type.member_types.size()) + expr += ", "; + } + expr += " }"; + if (i + 1 < num_control_points) + expr += ", "; + } + if (is_array_of_struct) + expr += " })"; + } + else if (flat_matrix) + { + bool is_array_of_matrix = is_array(result_type); + if (is_array_of_matrix && !ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading array of matrix from IO variable must come directly from IO variable."); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + if (is_array_of_matrix) + { + // Loading a matrix from each control point. + uint32_t base_interface_index = interface_index; + uint32_t num_control_points = to_array_size_literal(result_type, 0); + expr += type_to_glsl(result_type) + "({ "; + + auto &matrix_type = get_variable_element_type(get(ptr)); + + for (uint32_t i = 0; i < num_control_points; i++) + { + interface_index = base_interface_index; + expr += type_to_glsl(matrix_type) + "("; + for (uint32_t j = 0; j < result_type.columns; j++, interface_index++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += + access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + if (j + 1 < result_type.columns) + expr += ", "; + } + expr += ")"; + if (i + 1 < num_control_points) + expr += ", "; + } + + expr += " })"; + } + else + { + expr += type_to_glsl(result_type) + "("; + for (uint32_t i = 0; i < result_type.columns; i++, interface_index++) + { + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + if (i + 1 < result_type.columns) + expr += ", "; + } + expr += ")"; + } + } + else if (ptr_is_io_variable) + { + assert(is_array(result_type)); + assert(result_type.array.size() == 1); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + // We're loading an array directly from a global variable. + // This means we're loading one member from each control point. + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, 0); + + for (uint32_t i = 0; i < num_control_points; i++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else + { + // We're loading an array from a concrete control point. + assert(is_array(result_type)); + assert(result_type.array.size() == 1); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + expr += type_to_glsl(result_type) + "({ "; + uint32_t array_size = to_array_size_literal(result_type, 0); + for (uint32_t i = 0; i < array_size; i++, interface_index++) + { + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (expr_type.vecsize > result_type.vecsize) + expr += vector_swizzle(result_type.vecsize, 0); + if (i + 1 < array_size) + expr += ", "; + } + expr += " })"; + } + + emit_op(result_type_id, id, expr, false); + register_read(id, ptr, false); + return true; +} + +bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t length) +{ + // If this is a per-vertex output, remap it to the I/O array buffer. + + // Any object which did not go through IO flattening shenanigans will go there instead. + // We will unflatten on-demand instead as needed, but not all possible cases can be supported, especially with arrays. + + auto *var = maybe_get_backing_variable(ops[2]); + bool patch = false; + bool flat_data = false; + bool ptr_is_chain = false; + bool multi_patch = get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup; + + if (var) + { + patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var)); + + // Should match strip_array in add_interface_block. + flat_data = var->storage == StorageClassInput || + (var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl); + + // We might have a chained access chain, where + // we first take the access chain to the control point, and then we chain into a member or something similar. + // In this case, we need to skip gl_in/gl_out remapping. + ptr_is_chain = var->self != ID(ops[2]); + } + + BuiltIn bi_type = BuiltIn(get_decoration(ops[2], DecorationBuiltIn)); + if (var && flat_data && !patch && + (!is_builtin_variable(*var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + get_variable_data_type(*var).basetype == SPIRType::Struct)) + { + AccessChainMeta meta; + SmallVector indices; + uint32_t next_id = ir.increase_bound_by(1); + + indices.reserve(length - 3 + 1); + + uint32_t first_non_array_index = ptr_is_chain ? 3 : 4; + VariableID stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + VariableID ptr = ptr_is_chain ? VariableID(ops[2]) : stage_var_id; + if (!ptr_is_chain) + { + // Index into gl_in/gl_out with first array index. + indices.push_back(ops[3]); + } + + auto &result_ptr_type = get(ops[0]); + + uint32_t const_mbr_id = next_id++; + uint32_t index = get_extended_decoration(var->self, SPIRVCrossDecorationInterfaceMemberIndex); + if (var->storage == StorageClassInput || has_decoration(get_variable_element_type(*var).self, DecorationBlock)) + { + uint32_t i = first_non_array_index; + auto *type = &get_variable_element_type(*var); + if (index == uint32_t(-1) && length >= (first_non_array_index + 1)) + { + // Maybe this is a struct type in the input class, in which case + // we put it as a decoration on the corresponding member. + index = get_extended_member_decoration(var->self, get_constant(ops[first_non_array_index]).scalar(), + SPIRVCrossDecorationInterfaceMemberIndex); + assert(index != uint32_t(-1)); + i++; + type = &get(type->member_types[get_constant(ops[first_non_array_index]).scalar()]); + } + + // In this case, we're poking into flattened structures and arrays, so now we have to + // combine the following indices. If we encounter a non-constant index, + // we're hosed. + for (; i < length; ++i) + { + if ((multi_patch || (!is_array(*type) && !is_matrix(*type))) && type->basetype != SPIRType::Struct) + break; + + auto *c = maybe_get(ops[i]); + if (!c || c->specialization) + SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable in tessellation. " + "This is currently unsupported."); + + // We're in flattened space, so just increment the member index into IO block. + // We can only do this once in the current implementation, so either: + // Struct, Matrix or 1-dimensional array for a control point. + index += c->scalar(); + + if (type->parent_type) + type = &get(type->parent_type); + else if (type->basetype == SPIRType::Struct) + type = &get(type->member_types[c->scalar()]); + } + + if ((!multi_patch && (is_matrix(result_ptr_type) || is_array(result_ptr_type))) || + result_ptr_type.basetype == SPIRType::Struct) + { + // We're not going to emit the actual member name, we let any further OpLoad take care of that. + // Tag the access chain with the member index we're referencing. + set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index); + } + else + { + // Access the appropriate member of gl_in/gl_out. + set(const_mbr_id, get_uint_type_id(), index, false); + indices.push_back(const_mbr_id); + + // Append any straggling access chain indices. + if (i < length) + indices.insert(indices.end(), ops + i, ops + length); + } + } + else + { + assert(index != uint32_t(-1)); + set(const_mbr_id, get_uint_type_id(), index, false); + indices.push_back(const_mbr_id); + + indices.insert(indices.end(), ops + 4, ops + length); + } + + // We use the pointer to the base of the input/output array here, + // so this is always a pointer chain. + string e; + + if (!ptr_is_chain) + { + // This is the start of an access chain, use ptr_chain to index into control point array. + e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, true); + } + else + { + // If we're accessing a struct, we need to use member indices which are based on the IO block, + // not actual struct type, so we have to use a split access chain here where + // first path resolves the control point index, i.e. gl_in[index], and second half deals with + // looking up flattened member name. + + // However, it is possible that we partially accessed a struct, + // by taking pointer to member inside the control-point array. + // For this case, we fall back to a natural access chain since we have already dealt with remapping struct members. + // One way to check this here is if we have 2 implied read expressions. + // First one is the gl_in/gl_out struct itself, then an index into that array. + // If we have traversed further, we use a normal access chain formulation. + auto *ptr_expr = maybe_get(ptr); + if (ptr_expr && ptr_expr->implied_read_expressions.size() == 2) + { + e = join(to_expression(ptr), + access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()), + ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta)); + } + else + { + e = access_chain_internal(ptr, indices.data(), uint32_t(indices.size()), 0, &meta); + } + } + + // Get the actual type of the object that was accessed. If it's a vector type and we changed it, + // then we'll need to add a swizzle. + // For this, we can't necessarily rely on the type of the base expression, because it might be + // another access chain, and it will therefore already have the "correct" type. + auto *expr_type = &get_variable_data_type(*var); + if (has_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID)) + expr_type = &get(get_extended_decoration(ops[2], SPIRVCrossDecorationTessIOOriginalInputTypeID)); + for (uint32_t i = 3; i < length; i++) + { + if (!is_array(*expr_type) && expr_type->basetype == SPIRType::Struct) + expr_type = &get(expr_type->member_types[get(ops[i]).scalar()]); + else + expr_type = &get(expr_type->parent_type); + } + if (!is_array(*expr_type) && !is_matrix(*expr_type) && expr_type->basetype != SPIRType::Struct && + expr_type->vecsize > result_ptr_type.vecsize) + e += vector_swizzle(result_ptr_type.vecsize, 0); + + auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); + expr.loaded_from = var->self; + expr.need_transpose = meta.need_transpose; + expr.access_chain = true; + + // Mark the result as being packed if necessary. + if (meta.storage_is_packed) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); + if (meta.storage_is_invariant) + set_decoration(ops[1], DecorationInvariant); + // Save the type we found in case the result is used in another access chain. + set_extended_decoration(ops[1], SPIRVCrossDecorationTessIOOriginalInputTypeID, expr_type->self); + + // If we have some expression dependencies in our access chain, this access chain is technically a forwarded + // temporary which could be subject to invalidation. + // Need to assume we're forwarded while calling inherit_expression_depdendencies. + forwarded_temporaries.insert(ops[1]); + // The access chain itself is never forced to a temporary, but its dependencies might. + suppressed_usage_tracking.insert(ops[1]); + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(expr, ops[i]); + } + + // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, + // we're not forwarded after all. + if (expr.expression_dependencies.empty()) + forwarded_temporaries.erase(ops[1]); + + return true; + } + + // If this is the inner tessellation level, and we're tessellating triangles, + // drop the last index. It isn't an array in this case, so we can't have an + // array reference here. We need to make this ID a variable instead of an + // expression so we don't try to dereference it as a variable pointer. + // Don't do this if the index is a constant 1, though. We need to drop stores + // to that one. + auto *m = ir.find_meta(var ? var->self : ID(0)); + if (get_execution_model() == ExecutionModelTessellationControl && var && m && + m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles)) + { + auto *c = maybe_get(ops[3]); + if (c && c->scalar() == 1) + return false; + auto &dest_var = set(ops[1], *var); + dest_var.basetype = ops[0]; + ir.meta[ops[1]] = ir.meta[ops[2]]; + inherit_expression_dependencies(ops[1], ops[2]); + return true; + } + + return false; +} + +bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs) +{ + if (!get_entry_point().flags.get(ExecutionModeTriangles)) + return false; + + // In SPIR-V, TessLevelInner always has two elements and TessLevelOuter always has + // four. This is true even if we are tessellating triangles. This allows clients + // to use a single tessellation control shader with multiple tessellation evaluation + // shaders. + // In Metal, however, only the first element of TessLevelInner and the first three + // of TessLevelOuter are accessible. This stems from how in Metal, the tessellation + // levels must be stored to a dedicated buffer in a particular format that depends + // on the patch type. Therefore, in Triangles mode, any access to the second + // inner level or the fourth outer level must be dropped. + const auto *e = maybe_get(id_lhs); + if (!e || !e->access_chain) + return false; + BuiltIn builtin = BuiltIn(get_decoration(e->loaded_from, DecorationBuiltIn)); + if (builtin != BuiltInTessLevelInner && builtin != BuiltInTessLevelOuter) + return false; + auto *c = maybe_get(e->implied_read_expressions[1]); + if (!c) + return false; + return (builtin == BuiltInTessLevelInner && c->scalar() == 1) || + (builtin == BuiltInTessLevelOuter && c->scalar() == 3); +} + +void CompilerMSL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, + spv::StorageClass storage, bool &is_packed) +{ + // If there is any risk of writes happening with the access chain in question, + // and there is a risk of concurrent write access to other components, + // we must cast the access chain to a plain pointer to ensure we only access the exact scalars we expect. + // The MSL compiler refuses to allow component-level access for any non-packed vector types. + if (!is_packed && (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup)) + { + const char *addr_space = storage == StorageClassWorkgroup ? "threadgroup" : "device"; + expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")"); + + // Further indexing should happen with packed rules (array index, not swizzle). + is_packed = true; + } +} + +// Sets the interface member index for an access chain to a pull-model interpolant. +void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length) +{ + auto *var = maybe_get_backing_variable(ops[2]); + if (!var || !pull_model_inputs.count(var->self)) + return; + // Get the base index. + uint32_t interface_index; + auto &var_type = get_variable_data_type(*var); + auto &result_type = get(ops[0]); + auto *type = &var_type; + if (has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex)) + { + interface_index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); + } + else + { + // Assume an access chain into a struct variable. + assert(var_type.basetype == SPIRType::Struct); + auto &c = get(ops[3 + var_type.array.size()]); + interface_index = + get_extended_member_decoration(var->self, c.scalar(), SPIRVCrossDecorationInterfaceMemberIndex); + } + // Accumulate indices. We'll have to skip over the one for the struct, if present, because we already accounted + // for that getting the base index. + for (uint32_t i = 3; i < length; ++i) + { + if (is_vector(*type) && is_scalar(result_type)) + { + // We don't want to combine the next index. Actually, we need to save it + // so we know to apply a swizzle to the result of the interpolation. + set_extended_decoration(ops[1], SPIRVCrossDecorationInterpolantComponentExpr, ops[i]); + break; + } + + auto *c = maybe_get(ops[i]); + if (!c || c->specialization) + SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable using pull-model " + "interpolation. This is currently unsupported."); + + if (type->parent_type) + type = &get(type->parent_type); + else if (type->basetype == SPIRType::Struct) + type = &get(type->member_types[c->scalar()]); + + if (!has_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex) && + i - 3 == var_type.array.size()) + continue; + + interface_index += c->scalar(); + } + // Save this to the access chain itself so we can recover it later when calling an interpolation function. + set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, interface_index); +} + +// Override for MSL-specific syntax instructions +void CompilerMSL::emit_instruction(const Instruction &instruction) +{ +#define MSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define MSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define MSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define MSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define MSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define MSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define MSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define MSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) +#define MSL_UNORD_BOP(op) emit_binary_unord_op(ops[0], ops[1], ops[2], ops[3], #op) + + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (opcode) + { + case OpLoad: + { + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + if (is_tessellation_shader()) + { + if (!emit_tessellation_io_load(ops[0], id, ptr)) + CompilerGLSL::emit_instruction(instruction); + } + else + { + // Sample mask input for Metal is not an array + if (BuiltIn(get_decoration(ptr, DecorationBuiltIn)) == BuiltInSampleMask) + set_decoration(id, DecorationBuiltIn, BuiltInSampleMask); + CompilerGLSL::emit_instruction(instruction); + } + break; + } + + // Comparisons + case OpIEqual: + MSL_BOP_CAST(==, int_type); + break; + + case OpLogicalEqual: + case OpFOrdEqual: + MSL_BOP(==); + break; + + case OpINotEqual: + MSL_BOP_CAST(!=, int_type); + break; + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + MSL_BOP(!=); + break; + + case OpUGreaterThan: + MSL_BOP_CAST(>, uint_type); + break; + + case OpSGreaterThan: + MSL_BOP_CAST(>, int_type); + break; + + case OpFOrdGreaterThan: + MSL_BOP(>); + break; + + case OpUGreaterThanEqual: + MSL_BOP_CAST(>=, uint_type); + break; + + case OpSGreaterThanEqual: + MSL_BOP_CAST(>=, int_type); + break; + + case OpFOrdGreaterThanEqual: + MSL_BOP(>=); + break; + + case OpULessThan: + MSL_BOP_CAST(<, uint_type); + break; + + case OpSLessThan: + MSL_BOP_CAST(<, int_type); + break; + + case OpFOrdLessThan: + MSL_BOP(<); + break; + + case OpULessThanEqual: + MSL_BOP_CAST(<=, uint_type); + break; + + case OpSLessThanEqual: + MSL_BOP_CAST(<=, int_type); + break; + + case OpFOrdLessThanEqual: + MSL_BOP(<=); + break; + + case OpFUnordEqual: + MSL_UNORD_BOP(==); + break; + + case OpFUnordNotEqual: + MSL_UNORD_BOP(!=); + break; + + case OpFUnordGreaterThan: + MSL_UNORD_BOP(>); + break; + + case OpFUnordGreaterThanEqual: + MSL_UNORD_BOP(>=); + break; + + case OpFUnordLessThan: + MSL_UNORD_BOP(<); + break; + + case OpFUnordLessThanEqual: + MSL_UNORD_BOP(<=); + break; + + // Derivatives + case OpDPdx: + case OpDPdxFine: + case OpDPdxCoarse: + MSL_UFOP(dfdx); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + case OpDPdyFine: + case OpDPdyCoarse: + MSL_UFOP(dfdy); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + MSL_UFOP(fwidth); + register_control_dependent_expression(ops[1]); + break; + + // Bitfield + case OpBitFieldInsert: + { + emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "insert_bits", SPIRType::UInt); + break; + } + + case OpBitFieldSExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", int_type, int_type, + SPIRType::UInt, SPIRType::UInt); + break; + } + + case OpBitFieldUExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "extract_bits", uint_type, uint_type, + SPIRType::UInt, SPIRType::UInt); + break; + } + + case OpBitReverse: + // BitReverse does not have issues with sign since result type must match input type. + MSL_UFOP(reverse_bits); + break; + + case OpBitCount: + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "popcount", basetype, basetype); + break; + } + + case OpFRem: + MSL_BFOP(fmod); + break; + + case OpFMul: + if (msl_options.invariant_float_math) + MSL_BFOP(spvFMul); + else + MSL_BOP(*); + break; + + case OpFAdd: + if (msl_options.invariant_float_math) + MSL_BFOP(spvFAdd); + else + MSL_BOP(+); + break; + + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t mem_sem = ops[4]; + uint32_t val = ops[5]; + emit_atomic_func_op(result_type, id, "atomic_exchange_explicit", mem_sem, mem_sem, false, ptr, val); + break; + } + + case OpAtomicCompareExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t mem_sem_pass = ops[4]; + uint32_t mem_sem_fail = ops[5]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", mem_sem_pass, mem_sem_fail, true, + ptr, comp, true, false, val); + break; + } + + case OpAtomicCompareExchangeWeak: + SPIRV_CROSS_THROW("OpAtomicCompareExchangeWeak is only supported in kernel profile."); + + case OpAtomicLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t mem_sem = ops[4]; + emit_atomic_func_op(result_type, id, "atomic_load_explicit", mem_sem, mem_sem, false, ptr, 0); + break; + } + + case OpAtomicStore: + { + uint32_t result_type = expression_type(ops[0]).self; + uint32_t id = ops[0]; + uint32_t ptr = ops[0]; + uint32_t mem_sem = ops[2]; + uint32_t val = ops[3]; + emit_atomic_func_op(result_type, id, "atomic_store_explicit", mem_sem, mem_sem, false, ptr, val); + break; + } + +#define MSL_AFMO_IMPL(op, valsrc, valconst) \ + do \ + { \ + uint32_t result_type = ops[0]; \ + uint32_t id = ops[1]; \ + uint32_t ptr = ops[2]; \ + uint32_t mem_sem = ops[4]; \ + uint32_t val = valsrc; \ + emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val, \ + false, valconst); \ + } while (false) + +#define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5], false) +#define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1, true) + + case OpAtomicIIncrement: + MSL_AFMIO(add); + break; + + case OpAtomicIDecrement: + MSL_AFMIO(sub); + break; + + case OpAtomicIAdd: + MSL_AFMO(add); + break; + + case OpAtomicISub: + MSL_AFMO(sub); + break; + + case OpAtomicSMin: + case OpAtomicUMin: + MSL_AFMO(min); + break; + + case OpAtomicSMax: + case OpAtomicUMax: + MSL_AFMO(max); + break; + + case OpAtomicAnd: + MSL_AFMO(and); + break; + + case OpAtomicOr: + MSL_AFMO(or); + break; + + case OpAtomicXor: + MSL_AFMO(xor); + break; + + // Images + + // Reads == Fetches in Metal + case OpImageRead: + { + // Mark that this shader reads from this image + uint32_t img_id = ops[2]; + auto &type = expression_type(img_id); + if (type.image.dim != DimSubpassData) + { + auto *p_var = maybe_get_backing_variable(img_id); + if (p_var && has_decoration(p_var->self, DecorationNonReadable)) + { + unset_decoration(p_var->self, DecorationNonReadable); + force_recompile(); + } + } + + emit_texture_op(instruction, false); + break; + } + + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + if (var && atomic_image_vars.count(var->self)) + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + std::string coord = to_expression(ops[3]); + auto &type = expression_type(ops[2]); + if (type.image.dim == Dim2D) + { + coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")"); + } + + auto &e = set(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + } + else + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = + set(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + } + break; + } + + case OpImageWrite: + { + uint32_t img_id = ops[0]; + uint32_t coord_id = ops[1]; + uint32_t texel_id = ops[2]; + const uint32_t *opt = &ops[3]; + uint32_t length = instruction.length - 3; + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img_id); + auto &img_type = get(type.self); + + // Ensure this image has been marked as being written to and force a + // recommpile so that the image type output will include write access + auto *p_var = maybe_get_backing_variable(img_id); + if (p_var && has_decoration(p_var->self, DecorationNonWritable)) + { + unset_decoration(p_var->self, DecorationNonWritable); + force_recompile(); + } + + bool forward = false; + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t flags = 0; + + if (length) + { + flags = *opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + + auto &texel_type = expression_type(texel_id); + auto store_type = texel_type; + store_type.vecsize = 4; + + TextureFunctionArguments args = {}; + args.base.img = img_id; + args.base.imgtype = &img_type; + args.base.is_fetch = true; + args.coord = coord_id; + args.lod = lod; + statement(join(to_expression(img_id), ".write(", + remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), ", ", + CompilerMSL::to_function_args(args, &forward), ");")); + + if (p_var && variable_storage_is_aliased(*p_var)) + flush_all_aliased_variables(); + + break; + } + + case OpImageQuerySize: + case OpImageQuerySizeLod: + { + uint32_t rslt_type_id = ops[0]; + auto &rslt_type = get(rslt_type_id); + + uint32_t id = ops[1]; + + uint32_t img_id = ops[2]; + string img_exp = to_expression(img_id); + auto &img_type = expression_type(img_id); + Dim img_dim = img_type.image.dim; + bool img_is_array = img_type.image.arrayed; + + if (img_type.basetype != SPIRType::Image) + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); + + string lod; + if (opcode == OpImageQuerySizeLod) + { + // LOD index defaults to zero, so don't bother outputing level zero index + string decl_lod = to_expression(ops[3]); + if (decl_lod != "0") + lod = decl_lod; + } + + string expr = type_to_glsl(rslt_type) + "("; + expr += img_exp + ".get_width(" + lod + ")"; + + if (img_dim == Dim2D || img_dim == DimCube || img_dim == Dim3D) + expr += ", " + img_exp + ".get_height(" + lod + ")"; + + if (img_dim == Dim3D) + expr += ", " + img_exp + ".get_depth(" + lod + ")"; + + if (img_is_array) + { + expr += ", " + img_exp + ".get_array_size()"; + if (img_dim == DimCube && msl_options.emulate_cube_array) + expr += " / 6"; + } + + expr += ")"; + + emit_op(rslt_type_id, id, expr, should_forward(img_id)); + + break; + } + + case OpImageQueryLod: + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("ImageQueryLod is only supported on MSL 2.2 and up."); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t image_id = ops[2]; + uint32_t coord_id = ops[3]; + emit_uninitialized_temporary_expression(result_type, id); + + auto sampler_expr = to_sampler_expression(image_id); + auto *combined = maybe_get(image_id); + auto image_expr = combined ? to_expression(combined->image) : to_expression(image_id); + + // TODO: It is unclear if calculcate_clamped_lod also conditionally rounds + // the reported LOD based on the sampler. NEAREST miplevel should + // round the LOD, but LINEAR miplevel should not round. + // Let's hope this does not become an issue ... + statement(to_expression(id), ".x = ", image_expr, ".calculate_clamped_lod(", sampler_expr, ", ", + to_expression(coord_id), ");"); + statement(to_expression(id), ".y = ", image_expr, ".calculate_unclamped_lod(", sampler_expr, ", ", + to_expression(coord_id), ");"); + register_control_dependent_expression(id); + break; + } + +#define MSL_ImgQry(qrytype) \ + do \ + { \ + uint32_t rslt_type_id = ops[0]; \ + auto &rslt_type = get(rslt_type_id); \ + uint32_t id = ops[1]; \ + uint32_t img_id = ops[2]; \ + string img_exp = to_expression(img_id); \ + string expr = type_to_glsl(rslt_type) + "(" + img_exp + ".get_num_" #qrytype "())"; \ + emit_op(rslt_type_id, id, expr, should_forward(img_id)); \ + } while (false) + + case OpImageQueryLevels: + MSL_ImgQry(mip_levels); + break; + + case OpImageQuerySamples: + MSL_ImgQry(samples); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *combined = maybe_get(ops[2]); + + if (combined) + { + auto &e = emit_op(result_type, id, to_expression(combined->image), true, true); + auto *var = maybe_get_backing_variable(combined->image); + if (var) + e.loaded_from = var->self; + } + else + { + auto *var = maybe_get_backing_variable(ops[2]); + SPIRExpression *e; + if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler)) + e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true); + else + e = &emit_op(result_type, id, to_expression(ops[2]), true, true); + if (var) + e->loaded_from = var->self; + } + break; + } + + // Casting + case OpQuantizeToF16: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + string exp; + auto &type = get(result_type); + + switch (type.vecsize) + { + case 1: + exp = join("float(half(", to_expression(arg), "))"); + break; + case 2: + exp = join("float2(half2(", to_expression(arg), "))"); + break; + case 3: + exp = join("float3(half3(", to_expression(arg), "))"); + break; + case 4: + exp = join("float4(half4(", to_expression(arg), "))"); + break; + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + + emit_op(result_type, id, exp, should_forward(arg)); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + if (is_tessellation_shader()) + { + if (!emit_tessellation_access_chain(ops, instruction.length)) + CompilerGLSL::emit_instruction(instruction); + } + else + CompilerGLSL::emit_instruction(instruction); + fix_up_interpolant_access_chain(ops, instruction.length); + break; + + case OpStore: + if (is_out_of_bounds_tessellation_level(ops[0])) + break; + + if (maybe_emit_array_assignment(ops[0], ops[1])) + break; + + CompilerGLSL::emit_instruction(instruction); + break; + + // Compute barriers + case OpMemoryBarrier: + emit_barrier(0, ops[0], ops[1]); + break; + + case OpControlBarrier: + // In GLSL a memory barrier is often followed by a control barrier. + // But in MSL, memory barriers are also control barriers, so don't + // emit a simple control barrier if a memory barrier has just been emitted. + if (previous_instruction_opcode != OpMemoryBarrier) + emit_barrier(ops[0], ops[1], ops[2]); + break; + + case OpOuterProduct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; + + auto &type = get(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + if (!msl_options.invariant_float_math) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + // If the matrix needs transpose, just flip the multiply order. + auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); + if (e && e->need_transpose) + { + e->need_transpose = false; + string expr; + + if (opcode == OpMatrixTimesVector) + { + expr = join("spvFMulVectorMatrix(", to_enclosed_unpacked_expression(ops[3]), ", ", + to_unpacked_row_major_matrix_expression(ops[2]), ")"); + } + else + { + expr = join("spvFMulMatrixVector(", to_unpacked_row_major_matrix_expression(ops[3]), ", ", + to_enclosed_unpacked_expression(ops[2]), ")"); + } + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + emit_op(ops[0], ops[1], expr, forward); + e->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + { + if (opcode == OpMatrixTimesVector) + MSL_BFOP(spvFMulMatrixVector); + else + MSL_BFOP(spvFMulVectorMatrix); + } + break; + } + + case OpMatrixTimesMatrix: + { + if (!msl_options.invariant_float_math) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + auto *a = maybe_get(ops[2]); + auto *b = maybe_get(ops[3]); + + // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. + // a^T * b^T = (b * a)^T. + if (a && b && a->need_transpose && b->need_transpose) + { + a->need_transpose = false; + b->need_transpose = false; + + auto expr = + join("spvFMulMatrixMatrix(", enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), ", ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), ")"); + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + b->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + MSL_BFOP(spvFMulMatrixMatrix); + + break; + } + + case OpIAddCarry: + case OpISubBorrow: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + + auto &res_type = get(type.member_types[1]); + if (opcode == OpIAddCarry) + { + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " + ", + to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type), + "(1), ", type_to_glsl(res_type), "(0), ", to_expression(result_id), ".", to_member_name(type, 0), + " >= max(", to_expression(op0), ", ", to_expression(op1), "));"); + } + else + { + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " - ", + to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type), + "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_expression(op0), + " >= ", to_enclosed_expression(op1), ");"); + } + break; + } + + case OpUMulExtended: + case OpSMulExtended: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " * ", + to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", to_expression(op0), ", ", + to_expression(op1), ");"); + break; + } + + case OpArrayLength: + { + auto &type = expression_type(ops[2]); + uint32_t offset = type_struct_member_offset(type, ops[3]); + uint32_t stride = type_struct_member_array_stride(type, ops[3]); + + auto expr = join("(", to_buffer_size_expression(ops[2]), " - ", offset, ") / ", stride); + emit_op(ops[0], ops[1], expr, true); + break; + } + + // SPV_INTEL_shader_integer_functions2 + case OpUCountLeadingZerosINTEL: + MSL_UFOP(clz); + break; + + case OpUCountTrailingZerosINTEL: + MSL_UFOP(ctz); + break; + + case OpAbsISubINTEL: + case OpAbsUSubINTEL: + MSL_BFOP(absdiff); + break; + + case OpIAddSatINTEL: + case OpUAddSatINTEL: + MSL_BFOP(addsat); + break; + + case OpIAverageINTEL: + case OpUAverageINTEL: + MSL_BFOP(hadd); + break; + + case OpIAverageRoundedINTEL: + case OpUAverageRoundedINTEL: + MSL_BFOP(rhadd); + break; + + case OpISubSatINTEL: + case OpUSubSatINTEL: + MSL_BFOP(subsat); + break; + + case OpIMul32x16INTEL: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2], b = ops[3]; + bool forward = should_forward(a) && should_forward(b); + emit_op(result_type, id, join("int(short(", to_expression(a), ")) * int(short(", to_expression(b), "))"), + forward); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + case OpUMul32x16INTEL: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2], b = ops[3]; + bool forward = should_forward(a) && should_forward(b); + emit_op(result_type, id, join("uint(ushort(", to_expression(a), ")) * uint(ushort(", to_expression(b), "))"), + forward); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + // SPV_EXT_demote_to_helper_invocation + case OpDemoteToHelperInvocationEXT: + if (!msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("discard_fragment() does not formally have demote semantics until MSL 2.3."); + CompilerGLSL::emit_instruction(instruction); + break; + + case OpIsHelperInvocationEXT: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.3 on iOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires MSL 2.1 on macOS."); + emit_op(ops[0], ops[1], "simd_is_helper_thread()", false); + break; + + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("Raster order groups require MSL 2.0."); + break; // Nothing to do in the body + + default: + CompilerGLSL::emit_instruction(instruction); + break; + } + + previous_instruction_opcode = opcode; +} + +void CompilerMSL::emit_texture_op(const Instruction &i, bool sparse) +{ + if (sparse) + SPIRV_CROSS_THROW("Sparse feedback not yet supported in MSL."); + + if (msl_options.use_framebuffer_fetch_subpasses) + { + auto *ops = stream(i); + + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + + auto &type = expression_type(img); + auto &imgtype = get(type.self); + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (imgtype.image.dim == DimSubpassData) + { + // Subpass inputs cannot be invalidated, + // so just forward the expression directly. + string expr = to_expression(img); + emit_op(result_type_id, id, expr, true); + return; + } + } + + // Fallback to default implementation + CompilerGLSL::emit_texture_op(i, sparse); +} + +void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem) +{ + if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl) + return; + + uint32_t exe_scope = id_exe_scope ? evaluate_constant_u32(id_exe_scope) : uint32_t(ScopeInvocation); + uint32_t mem_scope = id_mem_scope ? evaluate_constant_u32(id_mem_scope) : uint32_t(ScopeInvocation); + // Use the wider of the two scopes (smaller value) + exe_scope = min(exe_scope, mem_scope); + + if (msl_options.emulate_subgroups && exe_scope >= ScopeSubgroup && !id_mem_sem) + // In this case, we assume a "subgroup" size of 1. The barrier, then, is a noop. + return; + + string bar_stmt; + if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2)) + bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier"; + else + bar_stmt = "threadgroup_barrier"; + bar_stmt += "("; + + uint32_t mem_sem = id_mem_sem ? evaluate_constant_u32(id_mem_sem) : uint32_t(MemorySemanticsMaskNone); + + // Use the | operator to combine flags if we can. + if (msl_options.supports_msl_version(1, 2)) + { + string mem_flags = ""; + // For tesc shaders, this also affects objects in the Output storage class. + // Since in Metal, these are placed in a device buffer, we have to sync device memory here. + if (get_execution_model() == ExecutionModelTessellationControl || + (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))) + mem_flags += "mem_flags::mem_device"; + + // Fix tessellation patch function processing + if (get_execution_model() == ExecutionModelTessellationControl || + (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask))) + { + if (!mem_flags.empty()) + mem_flags += " | "; + mem_flags += "mem_flags::mem_threadgroup"; + } + if (mem_sem & MemorySemanticsImageMemoryMask) + { + if (!mem_flags.empty()) + mem_flags += " | "; + mem_flags += "mem_flags::mem_texture"; + } + + if (mem_flags.empty()) + mem_flags = "mem_flags::mem_none"; + + bar_stmt += mem_flags; + } + else + { + if ((mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) && + (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask))) + bar_stmt += "mem_flags::mem_device_and_threadgroup"; + else if (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask)) + bar_stmt += "mem_flags::mem_device"; + else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask)) + bar_stmt += "mem_flags::mem_threadgroup"; + else if (mem_sem & MemorySemanticsImageMemoryMask) + bar_stmt += "mem_flags::mem_texture"; + else + bar_stmt += "mem_flags::mem_none"; + } + + bar_stmt += ");"; + + statement(bar_stmt); + + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); +} + +void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage, + StorageClass rhs_storage) +{ + // Allow Metal to use the array template to make arrays a value type. + // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback. + bool lhs_thread = (lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction || + lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate); + bool rhs_thread = (rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction || + rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate); + + // If threadgroup storage qualifiers are *not* used: + // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier. + if (lhs_thread && rhs_thread && !using_builtin_array()) + { + statement(lhs, " = ", to_expression(rhs_id), ";"); + } + else + { + // Assignment from an array initializer is fine. + auto &type = expression_type(rhs_id); + auto *var = maybe_get_backing_variable(rhs_id); + + // Unfortunately, we cannot template on address space in MSL, + // so explicit address space redirection it is ... + bool is_constant = false; + if (ir.ids[rhs_id].get_type() == TypeConstant) + { + is_constant = true; + } + else if (var && var->remapped_variable && var->statically_assigned && + ir.ids[var->static_expression].get_type() == TypeConstant) + { + is_constant = true; + } + else if (rhs_storage == StorageClassUniform) + { + is_constant = true; + } + + // For the case where we have OpLoad triggering an array copy, + // we cannot easily detect this case ahead of time since it's + // context dependent. We might have to force a recompile here + // if this is the only use of array copies in our shader. + if (type.array.size() > 1) + { + if (type.array.size() > kArrayCopyMultidimMax) + SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); + auto func = static_cast(SPVFuncImplArrayCopyMultidimBase + type.array.size()); + add_spv_func_and_recompile(func); + } + else + add_spv_func_and_recompile(SPVFuncImplArrayCopy); + + const char *tag = nullptr; + if (lhs_thread && is_constant) + tag = "FromConstantToStack"; + else if (lhs_storage == StorageClassWorkgroup && is_constant) + tag = "FromConstantToThreadGroup"; + else if (lhs_thread && rhs_thread) + tag = "FromStackToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_thread) + tag = "FromStackToThreadGroup"; + else if (lhs_thread && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToThreadGroup"; + else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassStorageBuffer) + tag = "FromDeviceToDevice"; + else if (lhs_storage == StorageClassStorageBuffer && is_constant) + tag = "FromConstantToDevice"; + else if (lhs_storage == StorageClassStorageBuffer && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToDevice"; + else if (lhs_storage == StorageClassStorageBuffer && rhs_thread) + tag = "FromStackToDevice"; + else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassStorageBuffer) + tag = "FromDeviceToThreadGroup"; + else if (lhs_thread && rhs_storage == StorageClassStorageBuffer) + tag = "FromDeviceToStack"; + else + SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); + + // Pass internal array of spvUnsafeArray<> into wrapper functions + if (lhs_thread && !msl_options.force_native_arrays) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");"); + else if (rhs_thread && !msl_options.force_native_arrays) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);"); + else + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); + } +} + +// Since MSL does not allow arrays to be copied via simple variable assignment, +// if the LHS and RHS represent an assignment of an entire array, it must be +// implemented by calling an array copy function. +// Returns whether the struct assignment was emitted. +bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) +{ + // We only care about assignments of an entire array + auto &type = expression_type(id_rhs); + if (type.array.size() == 0) + return false; + + auto *var = maybe_get(id_lhs); + + // Is this a remapped, static constant? Don't do anything. + if (var && var->remapped_variable && var->statically_assigned) + return true; + + if (ir.ids[id_rhs].get_type() == TypeConstant && var && var->deferred_declaration) + { + // Special case, if we end up declaring a variable when assigning the constant array, + // we can avoid the copy by directly assigning the constant expression. + // This is likely necessary to be able to use a variable as a true look-up table, as it is unlikely + // the compiler will be able to optimize the spvArrayCopy() into a constant LUT. + // After a variable has been declared, we can no longer assign constant arrays in MSL unfortunately. + statement(to_expression(id_lhs), " = ", constant_expression(get(id_rhs)), ";"); + return true; + } + + // Ensure the LHS variable has been declared + auto *p_v_lhs = maybe_get_backing_variable(id_lhs); + if (p_v_lhs) + flush_variable_declaration(p_v_lhs->self); + + emit_array_copy(to_expression(id_lhs), id_rhs, get_expression_effective_storage_class(id_lhs), + get_expression_effective_storage_class(id_rhs)); + register_write(id_lhs); + + return true; +} + +// Emits one of the atomic functions. In MSL, the atomic functions operate on pointers +void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, + uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1, + bool op1_is_pointer, bool op1_is_literal, uint32_t op2) +{ + string exp = string(op) + "("; + + auto &type = get_pointee_type(expression_type(obj)); + exp += "("; + auto *var = maybe_get_backing_variable(obj); + if (!var) + SPIRV_CROSS_THROW("No backing variable for atomic operation."); + + // Emulate texture2D atomic operations + const auto &res_type = get(var->basetype); + if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) + { + exp += "device"; + } + else + { + exp += get_argument_address_space(*var); + } + + exp += " atomic_"; + exp += type_to_glsl(type); + exp += "*)"; + + exp += "&"; + exp += to_enclosed_expression(obj); + + bool is_atomic_compare_exchange_strong = op1_is_pointer && op1; + + if (is_atomic_compare_exchange_strong) + { + assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0); + assert(op2); + assert(has_mem_order_2); + exp += ", &"; + exp += to_name(result_id); + exp += ", "; + exp += to_expression(op2); + exp += ", "; + exp += get_memory_order(mem_order_1); + exp += ", "; + exp += get_memory_order(mem_order_2); + exp += ")"; + + // MSL only supports the weak atomic compare exchange, so emit a CAS loop here. + // The MSL function returns false if the atomic write fails OR the comparison test fails, + // so we must validate that it wasn't the comparison test that failed before continuing + // the CAS loop, otherwise it will loop infinitely, with the comparison test always failing. + // The function updates the comparitor value from the memory value, so the additional + // comparison test evaluates the memory value against the expected value. + emit_uninitialized_temporary_expression(result_type, result_id); + statement("do"); + begin_scope(); + statement(to_name(result_id), " = ", to_expression(op1), ";"); + end_scope_decl(join("while (!", exp, " && ", to_name(result_id), " == ", to_enclosed_expression(op1), ")")); + } + else + { + assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0); + if (op1) + { + if (op1_is_literal) + exp += join(", ", op1); + else + exp += ", " + to_expression(op1); + } + if (op2) + exp += ", " + to_expression(op2); + + exp += string(", ") + get_memory_order(mem_order_1); + if (has_mem_order_2) + exp += string(", ") + get_memory_order(mem_order_2); + + exp += ")"; + + if (strcmp(op, "atomic_store_explicit") != 0) + emit_op(result_type, result_id, exp, false); + else + statement(exp, ";"); + } + + flush_all_atomic_capable_variables(); +} + +// Metal only supports relaxed memory order for now +const char *CompilerMSL::get_memory_order(uint32_t) +{ + return "memory_order_relaxed"; +} + +// Override for MSL-specific extension syntax instructions +void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) +{ + auto op = static_cast(eop); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "rsqrt"); + break; + case GLSLstd450RoundEven: + emit_unary_func_op(result_type, id, args[0], "rint"); + break; + + case GLSLstd450FindILsb: + { + // In this template version of findLSB, we return T. + auto basetype = expression_type(args[0]).basetype; + emit_unary_func_op_cast(result_type, id, args[0], "spvFindLSB", basetype, basetype); + break; + } + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "spvFindSMSB", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "spvFindUMSB", uint_type, uint_type); + break; + + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm2x16"); + break; + + case GLSLstd450PackHalf2x16: + { + auto expr = join("as_type(half2(", to_expression(args[0]), "))"); + emit_op(result_type, id, expr, should_forward(args[0])); + inherit_expression_dependencies(id, args[0]); + break; + } + + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpack_snorm4x8_to_float"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpack_unorm4x8_to_float"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpack_snorm2x16_to_float"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpack_unorm2x16_to_float"); + break; + + case GLSLstd450UnpackHalf2x16: + { + auto expr = join("float2(as_type(", to_expression(args[0]), "))"); + emit_op(result_type, id, expr, should_forward(args[0])); + inherit_expression_dependencies(id, args[0]); + break; + } + + case GLSLstd450PackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackDouble2x32"); // Currently unsupported + break; + case GLSLstd450UnpackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450UnpackDouble2x32"); // Currently unsupported + break; + + case GLSLstd450MatrixInverse: + { + auto &mat_type = get(result_type); + switch (mat_type.columns) + { + case 2: + emit_unary_func_op(result_type, id, args[0], "spvInverse2x2"); + break; + case 3: + emit_unary_func_op(result_type, id, args[0], "spvInverse3x3"); + break; + case 4: + emit_unary_func_op(result_type, id, args[0], "spvInverse4x4"); + break; + default: + break; + } + break; + } + + case GLSLstd450FMin: + // If the result type isn't float, don't bother calling the specific + // precise::/fast:: version. Metal doesn't have those for half and + // double types. + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "fast::min"); + break; + + case GLSLstd450FMax: + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "fast::max"); + break; + + case GLSLstd450FClamp: + // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call. + if (get(result_type).basetype != SPIRType::Float) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp"); + break; + + case GLSLstd450NMin: + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "precise::min"); + break; + + case GLSLstd450NMax: + if (get(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "precise::max"); + break; + + case GLSLstd450NClamp: + // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call. + if (get(result_type).basetype != SPIRType::Float) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp"); + break; + + case GLSLstd450InterpolateAtCentroid: + { + // We can't just emit the expression normally, because the qualified name contains a call to the default + // interpolate method, or refers to a local variable. We saved the interface index we need; use it to construct + // the base for the method call. + uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex); + string component; + if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr)) + { + uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr); + auto *c = maybe_get(index_expr); + if (!c || c->specialization) + component = join("[", to_expression(index_expr), "]"); + else + component = join(".", index_to_swizzle(c->scalar())); + } + emit_op(result_type, id, + join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index), + ".interpolate_at_centroid()", component), + should_forward(args[0])); + break; + } + + case GLSLstd450InterpolateAtSample: + { + uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex); + string component; + if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr)) + { + uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr); + auto *c = maybe_get(index_expr); + if (!c || c->specialization) + component = join("[", to_expression(index_expr), "]"); + else + component = join(".", index_to_swizzle(c->scalar())); + } + emit_op(result_type, id, + join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index), + ".interpolate_at_sample(", to_expression(args[1]), ")", component), + should_forward(args[0]) && should_forward(args[1])); + break; + } + + case GLSLstd450InterpolateAtOffset: + { + uint32_t interface_index = get_extended_decoration(args[0], SPIRVCrossDecorationInterfaceMemberIndex); + string component; + if (has_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr)) + { + uint32_t index_expr = get_extended_decoration(args[0], SPIRVCrossDecorationInterpolantComponentExpr); + auto *c = maybe_get(index_expr); + if (!c || c->specialization) + component = join("[", to_expression(index_expr), "]"); + else + component = join(".", index_to_swizzle(c->scalar())); + } + // Like Direct3D, Metal puts the (0, 0) at the upper-left corner, not the center as SPIR-V and GLSL do. + // Offset the offset by (1/2 - 1/16), or 0.4375, to compensate for this. + // It has to be (1/2 - 1/16) and not 1/2, or several CTS tests subtly break on Intel. + emit_op(result_type, id, + join(to_name(stage_in_var_id), ".", to_member_name(get_stage_in_struct_type(), interface_index), + ".interpolate_at_offset(", to_expression(args[1]), " + 0.4375)", component), + should_forward(args[0]) && should_forward(args[1])); + break; + } + + case GLSLstd450Distance: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Equivalent to length(a - b) -> abs(a - b). + emit_op(result_type, id, + join("abs(", to_enclosed_unpacked_expression(args[0]), " - ", + to_enclosed_unpacked_expression(args[1]), ")"), + should_forward(args[0]) && should_forward(args[1])); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Length: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Equivalent to abs(). + emit_unary_func_op(result_type, id, args[0], "abs"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Normalize: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Returns -1 or 1 for valid input, sign() does the job. + emit_unary_func_op(result_type, id, args[0], "sign"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Reflect: + if (get(result_type).vecsize == 1) + emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Refract: + if (get(result_type).vecsize == 1) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450FaceForward: + if (get(result_type).vecsize == 1) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Modf: + case GLSLstd450Frexp: + { + // Special case. If the variable is a scalar access chain, we cannot use it directly. We have to emit a temporary. + auto *ptr = maybe_get(args[1]); + if (ptr && ptr->access_chain && is_scalar(expression_type(args[1]))) + { + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + + // Need to create temporaries and copy over to access chain after. + // We cannot directly take the reference of a vector swizzle in MSL, even if it's scalar ... + uint32_t &tmp_id = extra_sub_expressions[id]; + if (!tmp_id) + tmp_id = ir.increase_bound_by(1); + + uint32_t tmp_type_id = get_pointee_type_id(ptr->expression_type); + emit_uninitialized_temporary_expression(tmp_type_id, tmp_id); + emit_binary_func_op(result_type, id, args[0], tmp_id, eop == GLSLstd450Modf ? "modf" : "frexp"); + statement(to_expression(args[1]), " = ", to_expression(tmp_id), ";"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } + + default: + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } +} + +void CompilerMSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t count) +{ + enum AMDShaderTrinaryMinMax + { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 + }; + + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Trinary min/max functions require MSL 2.1."); + + auto op = static_cast(eop); + + switch (op) + { + case FMid3AMD: + case UMid3AMD: + case SMid3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "median3"); + break; + default: + CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(result_type, id, eop, args, count); + break; + } +} + +// Emit a structure declaration for the specified interface variable. +void CompilerMSL::emit_interface_block(uint32_t ib_var_id) +{ + if (ib_var_id) + { + auto &ib_var = get(ib_var_id); + auto &ib_type = get_variable_data_type(ib_var); + assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty()); + emit_struct(ib_type); + } +} + +// Emits the declaration signature of the specified function. +// If this is the entry point function, Metal-specific return value and function arguments are added. +void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + local_variable_names = resource_names; + string decl; + + processing_entry_point = func.self == ir.default_entry_point; + + // Metal helper functions must be static force-inline otherwise they will cause problems when linked together in a single Metallib. + if (!processing_entry_point) + statement(force_inline); + + auto &type = get(func.return_type); + + if (!type.array.empty() && msl_options.force_native_arrays) + { + // We cannot return native arrays in MSL, so "return" through an out variable. + decl += "void"; + } + else + { + decl += func_type_decl(type); + } + + decl += " "; + decl += to_name(func.self); + decl += "("; + + if (!type.array.empty() && msl_options.force_native_arrays) + { + // Fake arrays returns by writing to an out array instead. + decl += "thread "; + decl += type_to_glsl(type); + decl += " (&spvReturnValue)"; + decl += type_to_array_glsl(type); + if (!func.arguments.empty()) + decl += ", "; + } + + if (processing_entry_point) + { + if (msl_options.argument_buffers) + decl += entry_point_args_argument_buffer(!func.arguments.empty()); + else + decl += entry_point_args_classic(!func.arguments.empty()); + + // If entry point function has variables that require early declaration, + // ensure they each have an empty initializer, creating one if needed. + // This is done at this late stage because the initialization expression + // is cleared after each compilation pass. + for (auto var_id : vars_needing_early_declaration) + { + auto &ed_var = get(var_id); + ID &initializer = ed_var.initializer; + if (!initializer) + initializer = ir.increase_bound_by(1); + + // Do not override proper initializers. + if (ir.ids[initializer].get_type() == TypeNone || ir.ids[initializer].get_type() == TypeExpression) + set(ed_var.initializer, "{}", ed_var.basetype, true); + } + } + + for (auto &arg : func.arguments) + { + uint32_t name_id = arg.id; + + auto *var = maybe_get(arg.id); + if (var) + { + // If we need to modify the name of the variable, make sure we modify the original variable. + // Our alias is just a shadow variable. + if (arg.alias_global_variable && var->basevariable) + name_id = var->basevariable; + + var->parameter = &arg; // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + } + + add_local_variable_name(name_id); + + decl += argument_decl(arg); + + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + + auto &arg_type = get(arg.type); + if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (auto *constexpr_sampler = find_constexpr_sampler(name_id)) + if (constexpr_sampler->ycbcr_conversion_enable) + planes = constexpr_sampler->planes; + for (uint32_t i = 1; i < planes; i++) + decl += join(", ", argument_decl(arg), plane_name_suffix, i); + + // Manufacture automatic sampler arg for SampledImage texture + if (arg_type.image.dim != DimBuffer) + decl += join(", thread const ", sampler_type(arg_type, arg.id), " ", to_sampler_expression(arg.id)); + } + + // Manufacture automatic swizzle arg. + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) && + !is_dynamic_img_sampler) + { + bool arg_is_array = !arg_type.array.empty(); + decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id)); + } + + if (buffers_requiring_array_length.count(name_id)) + { + bool arg_is_array = !arg_type.array.empty(); + decl += join(", constant uint", arg_is_array ? "* " : "& ", to_buffer_size_expression(name_id)); + } + + if (&arg != &func.arguments.back()) + decl += ", "; + } + + decl += ")"; + statement(decl); +} + +static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler) +{ + // For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images + // use implicit reconstruction. + return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1; +} + +// Returns the texture sampling function string for the specified image and sampling characteristics. +string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args) +{ + VariableID img = args.base.img; + auto &imgtype = *args.base.imgtype; + + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + + // Special-case gather. We have to alter the component being looked up + // in the swizzle case. + if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler && + (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable)) + { + add_spv_func_and_recompile(imgtype.image.depth ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle); + return imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; + } + + auto *combined = maybe_get(img); + + // Texture reference + string fname; + if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler) + { + if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3) + SPIRV_CROSS_THROW("Unhandled number of color image planes!"); + // 444 images aren't downsampled, so we don't need to do linear filtering. + if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 || + constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST) + { + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane); + fname = "spvChromaReconstructNearest"; + } + else // Linear with a downsampled format + { + fname = "spvChromaReconstructLinear"; + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + assert(false); + break; // not reached + case MSL_FORMAT_RESOLUTION_422: + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane); + fname += "422CositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane); + fname += "422Midpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid chroma location."); + } + break; + case MSL_FORMAT_RESOLUTION_420: + fname += "420"; + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane); + fname += "XCositedEvenYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane); + fname += "XCositedEvenYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane); + fname += "XMidpointYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane); + fname += "XMidpointYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid X chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + } + } + else + { + fname = to_expression(combined ? combined->image : img) + "."; + + // Texture function and sampler + if (args.base.is_fetch) + fname += "read"; + else if (args.base.is_gather) + fname += "gather"; + else + fname += "sample"; + + if (args.has_dref) + fname += "_compare"; + } + + return fname; +} + +string CompilerMSL::convert_to_f32(const string &expr, uint32_t components) +{ + SPIRType t; + t.basetype = SPIRType::Float; + t.vecsize = components; + t.columns = 1; + return join(type_to_glsl_constructor(t), "(", expr, ")"); +} + +static inline bool sampling_type_needs_f32_conversion(const SPIRType &type) +{ + // Double is not supported to begin with, but doesn't hurt to check for completion. + return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double; +} + +// Returns the function args for a texture sampling function for the specified image and sampling characteristics. +string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) +{ + VariableID img = args.base.img; + auto &imgtype = *args.base.imgtype; + uint32_t lod = args.lod; + uint32_t grad_x = args.grad_x; + uint32_t grad_y = args.grad_y; + uint32_t bias = args.bias; + + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + + string farg_str; + bool forward = true; + + if (!is_dynamic_img_sampler) + { + // Texture reference (for some cases) + if (needs_chroma_reconstruction(constexpr_sampler)) + { + // Multiplanar images need two or three textures. + farg_str += to_expression(img); + for (uint32_t i = 1; i < constexpr_sampler->planes; i++) + farg_str += join(", ", to_expression(img), plane_name_suffix, i); + } + else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && args.base.is_gather) + { + auto *combined = maybe_get(img); + farg_str += to_expression(combined ? combined->image : img); + } + + // Sampler reference + if (!args.base.is_fetch) + { + if (!farg_str.empty()) + farg_str += ", "; + farg_str += to_sampler_expression(img); + } + + if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && args.base.is_gather) + { + // Add the swizzle constant from the swizzle buffer. + farg_str += ", " + to_swizzle_expression(img); + used_swizzle_buffer = true; + } + + // Swizzled gather puts the component before the other args, to allow template + // deduction to work. + if (args.component && msl_options.swizzle_texture_samples) + { + forward = should_forward(args.component); + farg_str += ", " + to_component_argument(args.component); + } + } + + // Texture coordinates + forward = forward && should_forward(args.coord); + auto coord_expr = to_enclosed_expression(args.coord); + auto &coord_type = expression_type(args.coord); + bool coord_is_fp = type_is_floating_point(coord_type); + bool is_cube_fetch = false; + + string tex_coords = coord_expr; + uint32_t alt_coord_component = 0; + + switch (imgtype.image.dim) + { + + case Dim1D: + if (coord_type.vecsize > 1) + tex_coords = enclose_expression(tex_coords) + ".x"; + + if (args.base.is_fetch) + tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 1); + + if (msl_options.texture_1D_as_2D) + { + if (args.base.is_fetch) + tex_coords = "uint2(" + tex_coords + ", 0)"; + else + tex_coords = "float2(" + tex_coords + ", 0.5)"; + } + + alt_coord_component = 1; + break; + + case DimBuffer: + if (coord_type.vecsize > 1) + tex_coords = enclose_expression(tex_coords) + ".x"; + + if (msl_options.texture_buffer_native) + { + tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + } + else + { + // Metal texel buffer textures are 2D, so convert 1D coord to 2D. + // Support for Metal 2.1's new texture_buffer type. + if (args.base.is_fetch) + { + if (msl_options.texel_buffer_texture_width > 0) + { + tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + } + else + { + tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ", " + + to_expression(img) + ")"; + } + } + } + + alt_coord_component = 1; + break; + + case DimSubpassData: + // If we're using Metal's native frame-buffer fetch API for subpass inputs, + // this path will not be hit. + tex_coords = "uint2(gl_FragCoord.xy)"; + alt_coord_component = 2; + break; + + case Dim2D: + if (coord_type.vecsize > 2) + tex_coords = enclose_expression(tex_coords) + ".xy"; + + if (args.base.is_fetch) + tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 2); + + alt_coord_component = 2; + break; + + case Dim3D: + if (coord_type.vecsize > 3) + tex_coords = enclose_expression(tex_coords) + ".xyz"; + + if (args.base.is_fetch) + tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 3); + + alt_coord_component = 3; + break; + + case DimCube: + if (args.base.is_fetch) + { + is_cube_fetch = true; + tex_coords += ".xy"; + tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + } + else + { + if (coord_type.vecsize > 3) + tex_coords = enclose_expression(tex_coords) + ".xyz"; + } + + if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 3); + + alt_coord_component = 3; + break; + + default: + break; + } + + if (args.base.is_fetch && (args.offset || args.coffset)) + { + uint32_t offset_expr = args.offset ? args.offset : args.coffset; + // Fetch offsets must be applied directly to the coordinate. + forward = forward && should_forward(offset_expr); + auto &type = expression_type(offset_expr); + if (imgtype.image.dim == Dim1D && msl_options.texture_1D_as_2D) + { + if (type.basetype != SPIRType::UInt) + tex_coords += join(" + uint2(", bitcast_expression(SPIRType::UInt, offset_expr), ", 0)"); + else + tex_coords += join(" + uint2(", to_enclosed_expression(offset_expr), ", 0)"); + } + else + { + if (type.basetype != SPIRType::UInt) + tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset_expr); + else + tex_coords += " + " + to_enclosed_expression(offset_expr); + } + } + + // If projection, use alt coord as divisor + if (args.base.is_proj) + { + if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords += " / " + convert_to_f32(to_extract_component_expression(args.coord, alt_coord_component), 1); + else + tex_coords += " / " + to_extract_component_expression(args.coord, alt_coord_component); + } + + if (!farg_str.empty()) + farg_str += ", "; + + if (imgtype.image.dim == DimCube && imgtype.image.arrayed && msl_options.emulate_cube_array) + { + farg_str += "spvCubemapTo2DArrayFace(" + tex_coords + ").xy"; + + if (is_cube_fetch) + farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ")"; + else + farg_str += + ", uint(spvCubemapTo2DArrayFace(" + tex_coords + ").z) + (uint(" + + round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) + + ") * 6u)"; + + add_spv_func_and_recompile(SPVFuncImplCubemapTo2DArrayFace); + } + else + { + farg_str += tex_coords; + + // If fetch from cube, add face explicitly + if (is_cube_fetch) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.arrayed) + farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") % 6u"; + else + farg_str += + ", uint(" + round_fp_tex_coords(to_extract_component_expression(args.coord, 2), coord_is_fp) + ")"; + } + + // If array, use alt coord + if (imgtype.image.arrayed) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.dim == DimCube && args.base.is_fetch) + { + farg_str += ", uint(" + to_extract_component_expression(args.coord, 2) + ") / 6u"; + } + else + { + farg_str += + ", uint(" + + round_fp_tex_coords(to_extract_component_expression(args.coord, alt_coord_component), coord_is_fp) + + ")"; + if (imgtype.image.dim == DimSubpassData) + { + if (msl_options.multiview) + farg_str += " + gl_ViewIndex"; + else if (msl_options.arrayed_subpass_input) + farg_str += " + gl_Layer"; + } + } + } + else if (imgtype.image.dim == DimSubpassData) + { + if (msl_options.multiview) + farg_str += ", gl_ViewIndex"; + else if (msl_options.arrayed_subpass_input) + farg_str += ", gl_Layer"; + } + } + + // Depth compare reference value + if (args.dref) + { + forward = forward && should_forward(args.dref); + farg_str += ", "; + + auto &dref_type = expression_type(args.dref); + + string dref_expr; + if (args.base.is_proj) + dref_expr = join(to_enclosed_expression(args.dref), " / ", + to_extract_component_expression(args.coord, alt_coord_component)); + else + dref_expr = to_expression(args.dref); + + if (sampling_type_needs_f32_conversion(dref_type)) + dref_expr = convert_to_f32(dref_expr, 1); + + farg_str += dref_expr; + + if (msl_options.is_macos() && (grad_x || grad_y)) + { + // For sample compare, MSL does not support gradient2d for all targets (only iOS apparently according to docs). + // However, the most common case here is to have a constant gradient of 0, as that is the only way to express + // LOD == 0 in GLSL with sampler2DArrayShadow (cascaded shadow mapping). + // We will detect a compile-time constant 0 value for gradient and promote that to level(0) on MSL. + bool constant_zero_x = !grad_x || expression_is_constant_null(grad_x); + bool constant_zero_y = !grad_y || expression_is_constant_null(grad_y); + if (constant_zero_x && constant_zero_y) + { + lod = 0; + grad_x = 0; + grad_y = 0; + farg_str += ", level(0)"; + } + else if (!msl_options.supports_msl_version(2, 3)) + { + SPIRV_CROSS_THROW("Using non-constant 0.0 gradient() qualifier for sample_compare. This is not " + "supported on macOS prior to MSL 2.3."); + } + } + + if (msl_options.is_macos() && bias) + { + // Bias is not supported either on macOS with sample_compare. + // Verify it is compile-time zero, and drop the argument. + if (expression_is_constant_null(bias)) + { + bias = 0; + } + else if (!msl_options.supports_msl_version(2, 3)) + { + SPIRV_CROSS_THROW("Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported " + "on macOS prior to MSL 2.3."); + } + } + } + + // LOD Options + // Metal does not support LOD for 1D textures. + if (bias && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) + { + forward = forward && should_forward(bias); + farg_str += ", bias(" + to_expression(bias) + ")"; + } + + // Metal does not support LOD for 1D textures. + if (lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) + { + forward = forward && should_forward(lod); + if (args.base.is_fetch) + { + farg_str += ", " + to_expression(lod); + } + else + { + farg_str += ", level(" + to_expression(lod) + ")"; + } + } + else if (args.base.is_fetch && !lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D) && + imgtype.image.dim != DimBuffer && !imgtype.image.ms && imgtype.image.sampled != 2) + { + // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. + // Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL. + farg_str += ", 0"; + } + + // Metal does not support LOD for 1D textures. + if ((grad_x || grad_y) && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + string grad_opt; + switch (imgtype.image.dim) + { + case Dim1D: + case Dim2D: + grad_opt = "2d"; + break; + case Dim3D: + grad_opt = "3d"; + break; + case DimCube: + if (imgtype.image.arrayed && msl_options.emulate_cube_array) + grad_opt = "2d"; + else + grad_opt = "cube"; + break; + default: + grad_opt = "unsupported_gradient_dimension"; + break; + } + farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")"; + } + + if (args.min_lod) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("min_lod_clamp() is only supported in MSL 2.2+ and up."); + + forward = forward && should_forward(args.min_lod); + farg_str += ", min_lod_clamp(" + to_expression(args.min_lod) + ")"; + } + + // Add offsets + string offset_expr; + const SPIRType *offset_type = nullptr; + if (args.coffset && !args.base.is_fetch) + { + forward = forward && should_forward(args.coffset); + offset_expr = to_expression(args.coffset); + offset_type = &expression_type(args.coffset); + } + else if (args.offset && !args.base.is_fetch) + { + forward = forward && should_forward(args.offset); + offset_expr = to_expression(args.offset); + offset_type = &expression_type(args.offset); + } + + if (!offset_expr.empty()) + { + switch (imgtype.image.dim) + { + case Dim1D: + if (!msl_options.texture_1D_as_2D) + break; + if (offset_type->vecsize > 1) + offset_expr = enclose_expression(offset_expr) + ".x"; + + farg_str += join(", int2(", offset_expr, ", 0)"); + break; + + case Dim2D: + if (offset_type->vecsize > 2) + offset_expr = enclose_expression(offset_expr) + ".xy"; + + farg_str += ", " + offset_expr; + break; + + case Dim3D: + if (offset_type->vecsize > 3) + offset_expr = enclose_expression(offset_expr) + ".xyz"; + + farg_str += ", " + offset_expr; + break; + + default: + break; + } + } + + if (args.component) + { + // If 2D has gather component, ensure it also has an offset arg + if (imgtype.image.dim == Dim2D && offset_expr.empty()) + farg_str += ", int2(0)"; + + if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler) + { + forward = forward && should_forward(args.component); + + uint32_t image_var = 0; + if (const auto *combined = maybe_get(img)) + { + if (const auto *img_var = maybe_get_backing_variable(combined->image)) + image_var = img_var->self; + } + else if (const auto *var = maybe_get_backing_variable(img)) + { + image_var = var->self; + } + + if (image_var == 0 || !image_is_comparison(expression_type(image_var), image_var)) + farg_str += ", " + to_component_argument(args.component); + } + } + + if (args.sample) + { + forward = forward && should_forward(args.sample); + farg_str += ", "; + farg_str += to_expression(args.sample); + } + + *p_forward = forward; + + return farg_str; +} + +// If the texture coordinates are floating point, invokes MSL round() function to round them. +string CompilerMSL::round_fp_tex_coords(string tex_coords, bool coord_is_fp) +{ + return coord_is_fp ? ("round(" + tex_coords + ")") : tex_coords; +} + +// Returns a string to use in an image sampling function argument. +// The ID must be a scalar constant. +string CompilerMSL::to_component_argument(uint32_t id) +{ + uint32_t component_index = evaluate_constant_u32(id); + switch (component_index) + { + case 0: + return "component::x"; + case 1: + return "component::y"; + case 2: + return "component::z"; + case 3: + return "component::w"; + + default: + SPIRV_CROSS_THROW("The value (" + to_string(component_index) + ") of OpConstant ID " + to_string(id) + + " is not a valid Component index, which must be one of 0, 1, 2, or 3."); + return "component::x"; + } +} + +// Establish sampled image as expression object and assign the sampler to it. +void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + set(result_id, result_type, image_id, samp_id); +} + +string CompilerMSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions) +{ + auto *ops = stream(i); + uint32_t result_type_id = ops[0]; + uint32_t img = ops[2]; + auto &result_type = get(result_type_id); + auto op = static_cast(i.op); + bool is_gather = (op == OpImageGather || op == OpImageDrefGather); + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get(type.self); + + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + + string expr; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + // If this needs sampler Y'CbCr conversion, we need to do some additional + // processing. + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY: + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY: + // Default + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709); + expr += "spvConvertYCbCrBT709("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601); + expr += "spvConvertYCbCrBT601("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020); + expr += "spvConvertYCbCrBT2020("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + switch (constexpr_sampler->ycbcr_range) + { + case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL: + add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange); + expr += "spvExpandITUFullRange("; + break; + case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW: + add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange); + expr += "spvExpandITUNarrowRange("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr range."); + } + } + } + else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + add_spv_func_and_recompile(SPVFuncImplTextureSwizzle); + expr += "spvTextureSwizzle("; + } + + string inner_expr = CompilerGLSL::to_texture_op(i, sparse, forward, inherited_expressions); + + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + if (!constexpr_sampler->swizzle_is_identity()) + { + static const char swizzle_names[] = "rgba"; + if (!constexpr_sampler->swizzle_has_one_or_zero()) + { + // If we can, do it inline. + expr += inner_expr + "."; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + } + } + else + { + // Otherwise, we need to emit a temporary and swizzle that. + uint32_t temp_id = ir.increase_bound_by(1); + emit_op(result_type_id, temp_id, inner_expr, false); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(temp_id, inherit); + inherited_expressions.clear(); + inherited_expressions.push_back(temp_id); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(temp_id); + break; + + default: + break; + } + expr += type_to_glsl(result_type) + "("; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += to_expression(temp_id) + "." + swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_ZERO: + expr += "0"; + break; + case MSL_COMPONENT_SWIZZLE_ONE: + expr += "1"; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += to_expression(temp_id) + "." + + swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + if (c < 3) + expr += ", "; + } + expr += ")"; + } + } + else + expr += inner_expr; + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + expr += join(", ", constexpr_sampler->bpc, ")"); + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) + expr += ")"; + } + } + else + { + expr += inner_expr; + if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + // Add the swizzle constant from the swizzle buffer. + expr += ", " + to_swizzle_expression(img) + ")"; + used_swizzle_buffer = true; + } + } + + return expr; +} + +static string create_swizzle(MSLComponentSwizzle swizzle) +{ + switch (swizzle) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + return "spvSwizzle::none"; + case MSL_COMPONENT_SWIZZLE_ZERO: + return "spvSwizzle::zero"; + case MSL_COMPONENT_SWIZZLE_ONE: + return "spvSwizzle::one"; + case MSL_COMPONENT_SWIZZLE_R: + return "spvSwizzle::red"; + case MSL_COMPONENT_SWIZZLE_G: + return "spvSwizzle::green"; + case MSL_COMPONENT_SWIZZLE_B: + return "spvSwizzle::blue"; + case MSL_COMPONENT_SWIZZLE_A: + return "spvSwizzle::alpha"; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + return ""; + } +} + +// Returns a string representation of the ID, usable as a function arg. +// Manufacture automatic sampler arg for SampledImage texture. +string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) +{ + string arg_str; + + auto &type = expression_type(id); + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + // If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around. + bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler); + if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler) + arg_str = join("spvDynamicImageSampler<", type_to_glsl(get(type.image.type)), ">("); + + auto *c = maybe_get(id); + if (msl_options.force_native_arrays && c && !get(c->constant_type).array.empty()) + { + // If we are passing a constant array directly to a function for some reason, + // the callee will expect an argument in thread const address space + // (since we can only bind to arrays with references in MSL). + // To resolve this, we must emit a copy in this address space. + // This kind of code gen should be rare enough that performance is not a real concern. + // Inline the SPIR-V to avoid this kind of suboptimal codegen. + // + // We risk calling this inside a continue block (invalid code), + // so just create a thread local copy in the current function. + arg_str = join("_", id, "_array_copy"); + auto &constants = current_function->constant_arrays_needed_on_stack; + auto itr = find(begin(constants), end(constants), ID(id)); + if (itr == end(constants)) + { + force_recompile(); + constants.push_back(id); + } + } + else + arg_str += CompilerGLSL::to_func_call_arg(arg, id); + + // Need to check the base variable in case we need to apply a qualified alias. + uint32_t var_id = 0; + auto *var = maybe_get(id); + if (var) + var_id = var->basevariable; + + if (!arg_is_dynamic_img_sampler) + { + auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id); + if (type.basetype == SPIRType::SampledImage) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + planes = constexpr_sampler->planes; + // If this parameter isn't aliasing a global, then we need to use + // the special "dynamic image-sampler" class to pass it--and we need + // to use it for *every* non-alias parameter, in case a combined + // image-sampler with a Y'CbCr conversion is passed. Hopefully, this + // pathological case is so rare that it should never be hit in practice. + if (!arg.alias_global_variable) + add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler); + } + for (uint32_t i = 1; i < planes; i++) + arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i); + // Manufacture automatic sampler arg if the arg is a SampledImage texture. + if (type.image.dim != DimBuffer) + arg_str += ", " + to_sampler_expression(var_id ? var_id : id); + + // Add sampler Y'CbCr conversion info if we have it + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + SmallVector samp_args; + + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + // Default + break; + case MSL_FORMAT_RESOLUTION_422: + samp_args.push_back("spvFormatResolution::_422"); + break; + case MSL_FORMAT_RESOLUTION_420: + samp_args.push_back("spvFormatResolution::_420"); + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + + if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST) + samp_args.push_back("spvChromaFilter::linear"); + + if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvXChromaLocation::midpoint"); + if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvYChromaLocation::midpoint"); + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY: + // Default + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020"); + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL) + samp_args.push_back("spvYCbCrRange::itu_narrow"); + samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")")); + arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")"); + } + } + + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(", + create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(", + create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(", + create_swizzle(constexpr_sampler->swizzle[0]), ")"); + else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + arg_str += ", " + to_swizzle_expression(var_id ? var_id : id); + + if (buffers_requiring_array_length.count(var_id)) + arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id); + + if (is_dynamic_img_sampler) + arg_str += ")"; + } + + // Emulate texture2D atomic operations + auto *backing_var = maybe_get_backing_variable(var_id); + if (backing_var && atomic_image_vars.count(backing_var->self)) + { + arg_str += ", " + to_expression(var_id) + "_atomic"; + } + + return arg_str; +} + +// If the ID represents a sampled image that has been assigned a sampler already, +// generate an expression for the sampler, otherwise generate a fake sampler name +// by appending a suffix to the expression constructed from the ID. +string CompilerMSL::to_sampler_expression(uint32_t id) +{ + auto *combined = maybe_get(id); + auto expr = to_expression(combined ? combined->image : VariableID(id)); + auto index = expr.find_first_of('['); + + uint32_t samp_id = 0; + if (combined) + samp_id = combined->sampler; + + if (index == string::npos) + return samp_id ? to_expression(samp_id) : expr + sampler_name_suffix; + else + { + auto image_expr = expr.substr(0, index); + auto array_expr = expr.substr(index); + return samp_id ? to_expression(samp_id) : (image_expr + sampler_name_suffix + array_expr); + } +} + +string CompilerMSL::to_swizzle_expression(uint32_t id) +{ + auto *combined = maybe_get(id); + + auto expr = to_expression(combined ? combined->image : VariableID(id)); + auto index = expr.find_first_of('['); + + // If an image is part of an argument buffer translate this to a legal identifier. + string::size_type period = 0; + while ((period = expr.find_first_of('.', period)) != string::npos && period < index) + expr[period] = '_'; + + if (index == string::npos) + return expr + swizzle_name_suffix; + else + { + auto image_expr = expr.substr(0, index); + auto array_expr = expr.substr(index); + return image_expr + swizzle_name_suffix + array_expr; + } +} + +string CompilerMSL::to_buffer_size_expression(uint32_t id) +{ + auto expr = to_expression(id); + auto index = expr.find_first_of('['); + + // This is quite crude, but we need to translate the reference name (*spvDescriptorSetN.name) to + // the pointer expression spvDescriptorSetN.name to make a reasonable expression here. + // This only happens if we have argument buffers and we are using OpArrayLength on a lone SSBO in that set. + if (expr.size() >= 3 && expr[0] == '(' && expr[1] == '*') + expr = address_of_expression(expr); + + // If a buffer is part of an argument buffer translate this to a legal identifier. + for (auto &c : expr) + if (c == '.') + c = '_'; + + if (index == string::npos) + return expr + buffer_size_name_suffix; + else + { + auto buffer_expr = expr.substr(0, index); + auto array_expr = expr.substr(index); + return buffer_expr + buffer_size_name_suffix + array_expr; + } +} + +// Checks whether the type is a Block all of whose members have DecorationPatch. +bool CompilerMSL::is_patch_block(const SPIRType &type) +{ + if (!has_decoration(type.self, DecorationBlock)) + return false; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + if (!has_member_decoration(type.self, i, DecorationPatch)) + return false; + } + + return true; +} + +// Checks whether the ID is a row_major matrix that requires conversion before use +bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id) +{ + auto *e = maybe_get(id); + if (e) + return e->need_transpose; + else + return has_decoration(id, DecorationRowMajor); +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + return has_member_decoration(type.self, index, DecorationRowMajor); +} + +string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed) +{ + if (!is_matrix(exp_type)) + { + return CompilerGLSL::convert_row_major_matrix(move(exp_str), exp_type, physical_type_id, is_packed); + } + else + { + strip_enclosed_expression(exp_str); + if (physical_type_id != 0 || is_packed) + exp_str = unpack_expression_type(exp_str, exp_type, physical_type_id, is_packed, true); + return join("transpose(", exp_str, ")"); + } +} + +// Called automatically at the end of the entry point function +void CompilerMSL::emit_fixup() +{ + if (is_vertex_like_shader() && stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer) + { + if (options.vertex.fixup_clipspace) + statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name, + ".w) * 0.5; // Adjust clip-space for Metal"); + + if (options.vertex.flip_vert_y) + statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal"); + } +} + +// Return a string defining a structure member, with padding and packing. +string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier) +{ + if (member_is_remapped_physical_type(type, index)) + member_type_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); + auto &physical_type = get(member_type_id); + + // If this member is packed, mark it as so. + string pack_pfx; + + // Allow Metal to use the array template to make arrays a value type + uint32_t orig_id = 0; + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)) + orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID); + + bool row_major = false; + if (is_matrix(physical_type)) + row_major = has_member_decoration(type.self, index, DecorationRowMajor); + + SPIRType row_major_physical_type; + const SPIRType *declared_type = &physical_type; + + // If a struct is being declared with physical layout, + // do not use array wrappers. + // This avoids a lot of complicated cases with packed vectors and matrices, + // and generally we cannot copy full arrays in and out of buffers into Function + // address space. + // Array of resources should also be declared as builtin arrays. + if (has_member_decoration(type.self, index, DecorationOffset)) + is_using_builtin_array = true; + else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) + is_using_builtin_array = true; + + if (member_is_packed_physical_type(type, index)) + { + // If we're packing a matrix, output an appropriate typedef + if (physical_type.basetype == SPIRType::Struct) + { + SPIRV_CROSS_THROW("Cannot emit a packed struct currently."); + } + else if (is_matrix(physical_type)) + { + uint32_t rows = physical_type.vecsize; + uint32_t cols = physical_type.columns; + pack_pfx = "packed_"; + if (row_major) + { + // These are stored transposed. + rows = physical_type.columns; + cols = physical_type.vecsize; + pack_pfx = "packed_rm_"; + } + string base_type = physical_type.width == 16 ? "half" : "float"; + string td_line = "typedef "; + td_line += "packed_" + base_type + to_string(rows); + td_line += " " + pack_pfx; + // Use the actual matrix size here. + td_line += base_type + to_string(physical_type.columns) + "x" + to_string(physical_type.vecsize); + td_line += "[" + to_string(cols) + "]"; + td_line += ";"; + add_typedef_line(td_line); + } + else if (!is_scalar(physical_type)) // scalar type is already packed. + pack_pfx = "packed_"; + } + else if (row_major) + { + // Need to declare type with flipped vecsize/columns. + row_major_physical_type = physical_type; + swap(row_major_physical_type.vecsize, row_major_physical_type.columns); + declared_type = &row_major_physical_type; + } + + // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS. + if (msl_options.is_ios() && physical_type.basetype == SPIRType::Image && physical_type.image.sampled == 2) + { + if (!has_decoration(orig_id, DecorationNonWritable)) + SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS."); + } + + // Array information is baked into these types. + string array_type; + if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler && + physical_type.basetype != SPIRType::SampledImage) + { + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) + is_using_builtin_array = true; + array_type = type_to_array_glsl(physical_type); + } + + auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index), + member_attribute_qualifier(type, index), array_type, ";"); + + is_using_builtin_array = false; + return result; +} + +// Emit a structure member, padding and packing to maintain the correct memeber alignments. +void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t) +{ + // If this member requires padding to maintain its declared offset, emit a dummy padding member before it. + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget)) + { + uint32_t pad_len = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPaddingTarget); + statement("char _m", index, "_pad", "[", pad_len, "];"); + } + + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; + statement(to_struct_member(type, member_type_id, index, qualifier)); + builtin_declaration = false; +} + +void CompilerMSL::emit_struct_padding_target(const SPIRType &type) +{ + uint32_t struct_size = get_declared_struct_size_msl(type, true, true); + uint32_t target_size = get_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget); + if (target_size < struct_size) + SPIRV_CROSS_THROW("Cannot pad with negative bytes."); + else if (target_size > struct_size) + statement("char _m0_final_padding[", target_size - struct_size, "];"); +} + +// Return a MSL qualifier for the specified function attribute member +string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index) +{ + auto &execution = get_entry_point(); + + uint32_t mbr_type_id = type.member_types[index]; + auto &mbr_type = get(mbr_type_id); + + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(type, index, &builtin); + + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) + { + string quals = join( + " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")"); + if (interlocked_resources.count( + get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))) + quals += ", raster_order_group(0)"; + quals += "]]"; + return quals; + } + + // Vertex function inputs + if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInBaseVertex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInBaseInstance: + if (msl_options.vertex_for_tessellation) + return ""; + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location) + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Vertex and tessellation evaluation function outputs + if (((execution.model == ExecutionModelVertex && !msl_options.vertex_for_tessellation) || + execution.model == ExecutionModelTessellationEvaluation) && + type.storage == StorageClassOutput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInPointSize: + // Only mark the PointSize builtin if really rendering points. + // Some shaders may include a PointSize builtin even when used to render + // non-point topologies, and Metal will reject this builtin when compiling + // the shader into a render pipeline that uses a non-point topology. + return msl_options.enable_point_size_builtin ? (string(" [[") + builtin_qualifier(builtin) + "]]") : ""; + + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + /* fallthrough */ + case BuiltInPosition: + case BuiltInLayer: + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + + case BuiltInClipDistance: + if (has_member_decoration(type.self, index, DecorationLocation)) + return join(" [[user(clip", get_member_decoration(type.self, index, DecorationLocation), ")]]"); + else + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + + default: + return ""; + } + } + uint32_t comp; + uint32_t locn = get_ordered_member_location(type.self, index, &comp); + if (locn != k_unknown_location) + { + if (comp != k_unknown_component) + return string(" [[user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")]]"; + else + return string(" [[user(locn") + convert_to_string(locn) + ")]]"; + } + } + + // Tessellation control function inputs + if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInInvocationId: + case BuiltInPrimitiveId: + if (msl_options.multi_patch_workgroup) + return ""; + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage + case BuiltInSubgroupSize: // FIXME: Should work in any stage + if (msl_options.emulate_subgroups) + return ""; + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + case BuiltInPatchVertices: + return ""; + // Others come from stage input. + default: + break; + } + } + if (msl_options.multi_patch_workgroup) + return ""; + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location) + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Tessellation control function outputs + if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassOutput) + { + // For this type of shader, we always arrange for it to capture its + // output to a buffer. For this reason, qualifiers are irrelevant here. + return ""; + } + + // Tessellation evaluation function inputs + if (execution.model == ExecutionModelTessellationEvaluation && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInPrimitiveId: + case BuiltInTessCoord: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + case BuiltInPatchVertices: + return ""; + // Others come from stage input. + default: + break; + } + } + // The special control point array must not be marked with an attribute. + if (get_type(type.member_types[index]).basetype == SPIRType::ControlPointArray) + return ""; + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location) + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Tessellation evaluation function outputs were handled above. + + // Fragment function inputs + if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput) + { + string quals; + if (is_builtin) + { + switch (builtin) + { + case BuiltInViewIndex: + if (!msl_options.multiview || !msl_options.multiview_layered_rendering) + break; + /* fallthrough */ + case BuiltInFrontFacing: + case BuiltInPointCoord: + case BuiltInFragCoord: + case BuiltInSampleId: + case BuiltInSampleMask: + case BuiltInLayer: + case BuiltInBaryCoordNV: + case BuiltInBaryCoordNoPerspNV: + quals = builtin_qualifier(builtin); + break; + + case BuiltInClipDistance: + return join(" [[user(clip", get_member_decoration(type.self, index, DecorationLocation), ")]]"); + + default: + break; + } + } + else + { + uint32_t comp; + uint32_t locn = get_ordered_member_location(type.self, index, &comp); + if (locn != k_unknown_location) + { + // For user-defined attributes, this is fine. From Vulkan spec: + // A user-defined output variable is considered to match an input variable in the subsequent stage if + // the two variables are declared with the same Location and Component decoration and match in type + // and decoration, except that interpolation decorations are not required to match. For the purposes + // of interface matching, variables declared without a Component decoration are considered to have a + // Component decoration of zero. + + if (comp != k_unknown_component && comp != 0) + quals = string("user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")"; + else + quals = string("user(locn") + convert_to_string(locn) + ")"; + } + } + + if (builtin == BuiltInBaryCoordNV || builtin == BuiltInBaryCoordNoPerspNV) + { + if (has_member_decoration(type.self, index, DecorationFlat) || + has_member_decoration(type.self, index, DecorationCentroid) || + has_member_decoration(type.self, index, DecorationSample) || + has_member_decoration(type.self, index, DecorationNoPerspective)) + { + // NoPerspective is baked into the builtin type. + SPIRV_CROSS_THROW( + "Flat, Centroid, Sample, NoPerspective decorations are not supported for BaryCoord inputs."); + } + } + + // Don't bother decorating integers with the 'flat' attribute; it's + // the default (in fact, the only option). Also don't bother with the + // FragCoord builtin; it's always noperspective on Metal. + if (!type_is_integral(mbr_type) && (!is_builtin || builtin != BuiltInFragCoord)) + { + if (has_member_decoration(type.self, index, DecorationFlat)) + { + if (!quals.empty()) + quals += ", "; + quals += "flat"; + } + else if (has_member_decoration(type.self, index, DecorationCentroid)) + { + if (!quals.empty()) + quals += ", "; + if (has_member_decoration(type.self, index, DecorationNoPerspective)) + quals += "centroid_no_perspective"; + else + quals += "centroid_perspective"; + } + else if (has_member_decoration(type.self, index, DecorationSample)) + { + if (!quals.empty()) + quals += ", "; + if (has_member_decoration(type.self, index, DecorationNoPerspective)) + quals += "sample_no_perspective"; + else + quals += "sample_perspective"; + } + else if (has_member_decoration(type.self, index, DecorationNoPerspective)) + { + if (!quals.empty()) + quals += ", "; + quals += "center_no_perspective"; + } + } + + if (!quals.empty()) + return " [[" + quals + "]]"; + } + + // Fragment function outputs + if (execution.model == ExecutionModelFragment && type.storage == StorageClassOutput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInFragStencilRefEXT: + // Similar to PointSize, only mark FragStencilRef if there's a stencil buffer. + // Some shaders may include a FragStencilRef builtin even when used to render + // without a stencil attachment, and Metal will reject this builtin + // when compiling the shader into a render pipeline that does not set + // stencilAttachmentPixelFormat. + if (!msl_options.enable_frag_stencil_ref_builtin) + return ""; + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Stencil export only supported in MSL 2.1 and up."); + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + case BuiltInFragDepth: + // Ditto FragDepth. + if (!msl_options.enable_frag_depth_builtin) + return ""; + /* fallthrough */ + case BuiltInSampleMask: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + // Metal will likely complain about missing color attachments, too. + if (locn != k_unknown_location && !(msl_options.enable_frag_output_mask & (1 << locn))) + return ""; + if (locn != k_unknown_location && has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[color(", locn, "), index(", get_member_decoration(type.self, index, DecorationIndex), + ")]]"); + else if (locn != k_unknown_location) + return join(" [[color(", locn, ")]]"); + else if (has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[index(", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + else + return ""; + } + + // Compute function inputs + if (execution.model == ExecutionModelGLCompute && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInNumSubgroups: + case BuiltInSubgroupId: + case BuiltInSubgroupLocalInvocationId: // FIXME: Should work in any stage + case BuiltInSubgroupSize: // FIXME: Should work in any stage + if (msl_options.emulate_subgroups) + break; + /* fallthrough */ + case BuiltInGlobalInvocationId: + case BuiltInWorkgroupId: + case BuiltInNumWorkgroups: + case BuiltInLocalInvocationId: + case BuiltInLocalInvocationIndex: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + } + + return ""; +} + +// Returns the location decoration of the member with the specified index in the specified type. +// If the location of the member has been explicitly set, that location is used. If not, this +// function assumes the members are ordered in their location order, and simply returns the +// index as the location. +uint32_t CompilerMSL::get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp) +{ + auto &m = ir.meta[type_id]; + if (index < m.members.size()) + { + auto &dec = m.members[index]; + if (comp) + { + if (dec.decoration_flags.get(DecorationComponent)) + *comp = dec.component; + else + *comp = k_unknown_component; + } + if (dec.decoration_flags.get(DecorationLocation)) + return dec.location; + } + + return index; +} + +// Returns the type declaration for a function, including the +// entry type if the current function is the entry point function +string CompilerMSL::func_type_decl(SPIRType &type) +{ + // The regular function return type. If not processing the entry point function, that's all we need + string return_type = type_to_glsl(type) + type_to_array_glsl(type); + if (!processing_entry_point) + return return_type; + + // If an outgoing interface block has been defined, and it should be returned, override the entry point return type + bool ep_should_return_output = !get_is_rasterization_disabled(); + if (stage_out_var_id && ep_should_return_output) + return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type); + + // Prepend a entry type, based on the execution model + string entry_type; + auto &execution = get_entry_point(); + switch (execution.model) + { + case ExecutionModelVertex: + if (msl_options.vertex_for_tessellation && !msl_options.supports_msl_version(1, 2)) + SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); + entry_type = msl_options.vertex_for_tessellation ? "kernel" : "vertex"; + break; + case ExecutionModelTessellationEvaluation: + if (!msl_options.supports_msl_version(1, 2)) + SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); + if (execution.flags.get(ExecutionModeIsolines)) + SPIRV_CROSS_THROW("Metal does not support isoline tessellation."); + if (msl_options.is_ios()) + entry_type = + join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ") ]] vertex"); + else + entry_type = join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ", ", + execution.output_vertices, ") ]] vertex"); + break; + case ExecutionModelFragment: + entry_type = execution.flags.get(ExecutionModeEarlyFragmentTests) || + execution.flags.get(ExecutionModePostDepthCoverage) ? + "[[ early_fragment_tests ]] fragment" : + "fragment"; + break; + case ExecutionModelTessellationControl: + if (!msl_options.supports_msl_version(1, 2)) + SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); + if (execution.flags.get(ExecutionModeIsolines)) + SPIRV_CROSS_THROW("Metal does not support isoline tessellation."); + /* fallthrough */ + case ExecutionModelGLCompute: + case ExecutionModelKernel: + entry_type = "kernel"; + break; + default: + entry_type = "unknown"; + break; + } + + return entry_type + " " + return_type; +} + +// In MSL, address space qualifiers are required for all pointer or reference variables +string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) +{ + const auto &type = get(argument.basetype); + return get_type_address_space(type, argument.self, true); +} + +string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument) +{ + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + auto *var = maybe_get(id); + if (var && type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + + const char *addr_space = nullptr; + switch (type.storage) + { + case StorageClassWorkgroup: + addr_space = "threadgroup"; + break; + + case StorageClassStorageBuffer: + { + // For arguments from variable pointers, we use the write count deduction, so + // we should not assume any constness here. Only for global SSBOs. + bool readonly = false; + if (!var || has_decoration(type.self, DecorationBlock)) + readonly = flags.get(DecorationNonWritable); + + addr_space = readonly ? "const device" : "device"; + break; + } + + case StorageClassUniform: + case StorageClassUniformConstant: + case StorageClassPushConstant: + if (type.basetype == SPIRType::Struct) + { + bool ssbo = has_decoration(type.self, DecorationBufferBlock); + if (ssbo) + addr_space = flags.get(DecorationNonWritable) ? "const device" : "device"; + else + addr_space = "constant"; + } + else if (!argument) + { + addr_space = "constant"; + } + else if (type_is_msl_framebuffer_fetch(type)) + { + // Subpass inputs are passed around by value. + addr_space = ""; + } + break; + + case StorageClassFunction: + case StorageClassGeneric: + break; + + case StorageClassInput: + if (get_execution_model() == ExecutionModelTessellationControl && var && + var->basevariable == stage_in_ptr_var_id) + addr_space = msl_options.multi_patch_workgroup ? "constant" : "threadgroup"; + if (get_execution_model() == ExecutionModelFragment && var && var->basevariable == stage_in_var_id) + addr_space = "thread"; + break; + + case StorageClassOutput: + if (capture_output_to_buffer) + addr_space = "device"; + break; + + default: + break; + } + + if (!addr_space) + // No address space for plain values. + addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : ""; + + return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space); +} + +const char *CompilerMSL::to_restrict(uint32_t id, bool space) +{ + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable) + { + uint32_t type_id = expression_type_id(id); + auto &type = expression_type(id); + if (type.basetype == SPIRType::Struct && + (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + } + else + flags = get_decoration_bitset(id); + + return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : ""; +} + +string CompilerMSL::entry_point_arg_stage_in() +{ + string decl; + + if (get_execution_model() == ExecutionModelTessellationControl && msl_options.multi_patch_workgroup) + return decl; + + // Stage-in structure + uint32_t stage_in_id; + if (get_execution_model() == ExecutionModelTessellationEvaluation) + stage_in_id = patch_stage_in_var_id; + else + stage_in_id = stage_in_var_id; + + if (stage_in_id) + { + auto &var = get(stage_in_id); + auto &type = get_variable_data_type(var); + + add_resource_name(var.self); + decl = join(type_to_glsl(type), " ", to_name(var.self), " [[stage_in]]"); + } + + return decl; +} + +// Returns true if this input builtin should be a direct parameter on a shader function parameter list, +// and false for builtins that should be passed or calculated some other way. +bool CompilerMSL::is_direct_input_builtin(BuiltIn bi_type) +{ + switch (bi_type) + { + // Vertex function in + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInBaseVertex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInBaseInstance: + return get_execution_model() != ExecutionModelVertex || !msl_options.vertex_for_tessellation; + // Tess. control function in + case BuiltInPosition: + case BuiltInPointSize: + case BuiltInClipDistance: + case BuiltInCullDistance: + case BuiltInPatchVertices: + return false; + case BuiltInInvocationId: + case BuiltInPrimitiveId: + return get_execution_model() != ExecutionModelTessellationControl || !msl_options.multi_patch_workgroup; + // Tess. evaluation function in + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + return false; + // Fragment function in + case BuiltInSamplePosition: + case BuiltInHelperInvocation: + case BuiltInBaryCoordNV: + case BuiltInBaryCoordNoPerspNV: + return false; + case BuiltInViewIndex: + return get_execution_model() == ExecutionModelFragment && msl_options.multiview && + msl_options.multiview_layered_rendering; + // Compute function in + case BuiltInSubgroupId: + case BuiltInNumSubgroups: + return !msl_options.emulate_subgroups; + // Any stage function in + case BuiltInDeviceIndex: + case BuiltInSubgroupEqMask: + case BuiltInSubgroupGeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupLtMask: + return false; + case BuiltInSubgroupSize: + if (msl_options.fixed_subgroup_size != 0) + return false; + /* fallthrough */ + case BuiltInSubgroupLocalInvocationId: + return !msl_options.emulate_subgroups; + default: + return true; + } +} + +// Returns true if this is a fragment shader that runs per sample, and false otherwise. +bool CompilerMSL::is_sample_rate() const +{ + auto &caps = get_declared_capabilities(); + return get_execution_model() == ExecutionModelFragment && + (msl_options.force_sample_rate_shading || + std::find(caps.begin(), caps.end(), CapabilitySampleRateShading) != caps.end() || + (msl_options.use_framebuffer_fetch_subpasses && need_subpass_input)); +} + +void CompilerMSL::entry_point_args_builtin(string &ep_args) +{ + // Builtin variables + SmallVector, 8> active_builtins; + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { + auto bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + + // Don't emit SamplePosition as a separate parameter. In the entry + // point, we get that by calling get_sample_position() on the sample ID. + if (var.storage == StorageClassInput && is_builtin_variable(var) && + get_variable_data_type(var).basetype != SPIRType::Struct && + get_variable_data_type(var).basetype != SPIRType::ControlPointArray) + { + // If the builtin is not part of the active input builtin set, don't emit it. + // Relevant for multiple entry-point modules which might declare unused builtins. + if (!active_input_builtins.get(bi_type) || !interface_variable_exists_in_entry_point(var_id)) + return; + + // Remember this variable. We may need to correct its type. + active_builtins.push_back(make_pair(&var, bi_type)); + + if (is_direct_input_builtin(bi_type)) + { + if (!ep_args.empty()) + ep_args += ", "; + + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; + ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id); + ep_args += " [[" + builtin_qualifier(bi_type); + if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage)) + { + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Post-depth coverage requires MSL 2.0."); + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Post-depth coverage on Mac requires MSL 2.3."); + ep_args += ", post_depth_coverage"; + } + ep_args += "]]"; + builtin_declaration = false; + } + } + + if (var.storage == StorageClassInput && + has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInDispatchBase)) + { + // This is a special implicit builtin, not corresponding to any SPIR-V builtin, + // which holds the base that was passed to vkCmdDispatchBase() or vkCmdDrawIndexed(). If it's present, + // assume we emitted it for a good reason. + assert(msl_options.supports_msl_version(1, 2)); + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_origin]]"; + } + + if (var.storage == StorageClassInput && + has_extended_decoration(var_id, SPIRVCrossDecorationBuiltInStageInputSize)) + { + // This is another special implicit builtin, not corresponding to any SPIR-V builtin, + // which holds the number of vertices and instances to draw. If it's present, + // assume we emitted it for a good reason. + assert(msl_options.supports_msl_version(1, 2)); + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += type_to_glsl(get_variable_data_type(var)) + " " + to_expression(var_id) + " [[grid_size]]"; + } + }); + + // Correct the types of all encountered active builtins. We couldn't do this before + // because ensure_correct_builtin_type() may increase the bound, which isn't allowed + // while iterating over IDs. + for (auto &var : active_builtins) + var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second); + + // Handle HLSL-style 0-based vertex/instance index. + if (needs_base_vertex_arg == TriState::Yes) + ep_args += built_in_func_arg(BuiltInBaseVertex, !ep_args.empty()); + + if (needs_base_instance_arg == TriState::Yes) + ep_args += built_in_func_arg(BuiltInBaseInstance, !ep_args.empty()); + + if (capture_output_to_buffer) + { + // Add parameters to hold the indirect draw parameters and the shader output. This has to be handled + // specially because it needs to be a pointer, not a reference. + if (stage_out_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("device ", type_to_glsl(get_stage_out_struct_type()), "* ", output_buffer_var_name, + " [[buffer(", msl_options.shader_output_buffer_index, ")]]"); + } + + if (get_execution_model() == ExecutionModelTessellationControl) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += + join("constant uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); + } + else if (stage_out_var_id && + !(get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation)) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += + join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); + } + + if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation && + (active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInVertexId)) && + msl_options.vertex_index_type != Options::IndexType::None) + { + // Add the index buffer so we can set gl_VertexIndex correctly. + if (!ep_args.empty()) + ep_args += ", "; + switch (msl_options.vertex_index_type) + { + case Options::IndexType::None: + break; + case Options::IndexType::UInt16: + ep_args += join("const device ushort* ", index_buffer_var_name, " [[buffer(", + msl_options.shader_index_buffer_index, ")]]"); + break; + case Options::IndexType::UInt32: + ep_args += join("const device uint* ", index_buffer_var_name, " [[buffer(", + msl_options.shader_index_buffer_index, ")]]"); + break; + } + } + + // Tessellation control shaders get three additional parameters: + // a buffer to hold the per-patch data, a buffer to hold the per-patch + // tessellation levels, and a block of workgroup memory to hold the + // input control point data. + if (get_execution_model() == ExecutionModelTessellationControl) + { + if (patch_stage_out_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += + join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ", patch_output_buffer_var_name, + " [[buffer(", convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]"); + } + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, " [[buffer(", + convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]"); + if (stage_in_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + if (msl_options.multi_patch_workgroup) + { + ep_args += join("device ", type_to_glsl(get_stage_in_struct_type()), "* ", input_buffer_var_name, + " [[buffer(", convert_to_string(msl_options.shader_input_buffer_index), ")]]"); + } + else + { + ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name, + " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]"); + } + } + } + } +} + +string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) +{ + string ep_args = entry_point_arg_stage_in(); + Bitset claimed_bindings; + + for (uint32_t i = 0; i < kMaxArgumentBuffers; i++) + { + uint32_t id = argument_buffer_ids[i]; + if (id == 0) + continue; + + add_resource_name(id); + auto &var = get(id); + auto &type = get_variable_data_type(var); + + if (!ep_args.empty()) + ep_args += ", "; + + // Check if the argument buffer binding itself has been remapped. + uint32_t buffer_binding; + auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding }); + if (itr != end(resource_bindings)) + { + buffer_binding = itr->second.first.msl_buffer; + itr->second.second = true; + } + else + { + // As a fallback, directly map desc set <-> binding. + // If that was taken, take the next buffer binding. + if (claimed_bindings.get(i)) + buffer_binding = next_metal_resource_index_buffer; + else + buffer_binding = i; + } + + claimed_bindings.set(buffer_binding); + + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id); + ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]"; + + next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1); + } + + entry_point_args_discrete_descriptors(ep_args); + entry_point_args_builtin(ep_args); + + if (!ep_args.empty() && append_comma) + ep_args += ", "; + + return ep_args; +} + +const MSLConstexprSampler *CompilerMSL::find_constexpr_sampler(uint32_t id) const +{ + // Try by ID. + { + auto itr = constexpr_samplers_by_id.find(id); + if (itr != end(constexpr_samplers_by_id)) + return &itr->second; + } + + // Try by binding. + { + uint32_t desc_set = get_decoration(id, DecorationDescriptorSet); + uint32_t binding = get_decoration(id, DecorationBinding); + + auto itr = constexpr_samplers_by_binding.find({ desc_set, binding }); + if (itr != end(constexpr_samplers_by_binding)) + return &itr->second; + } + + return nullptr; +} + +void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) +{ + // Output resources, sorted by resource index & type + // We need to sort to work around a bug on macOS 10.13 with NVidia drivers where switching between shaders + // with different order of buffers can result in issues with buffer assignments inside the driver. + struct Resource + { + SPIRVariable *var; + string name; + SPIRType::BaseType basetype; + uint32_t index; + uint32_t plane; + uint32_t secondary_index; + }; + + SmallVector resources; + + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { + if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && + !is_hidden_variable(var)) + { + auto &type = get_variable_data_type(var); + + // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS. + // But we won't know when the argument buffer is encoded whether this image will have + // a NonWritable decoration. So just use discrete arguments for all storage images + // on iOS. + if (!(msl_options.is_ios() && type.basetype == SPIRType::Image && type.image.sampled == 2) && + var.storage != StorageClassPushConstant) + { + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + return; + } + + const MSLConstexprSampler *constexpr_sampler = nullptr; + if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler) + { + constexpr_sampler = find_constexpr_sampler(var_id); + if (constexpr_sampler) + { + // Mark this ID as a constexpr sampler for later in case it came from set/bindings. + constexpr_samplers_by_id[var_id] = *constexpr_sampler; + } + } + + // Emulate texture2D atomic operations + uint32_t secondary_index = 0; + if (atomic_image_vars.count(var.self)) + { + secondary_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0); + } + + if (type.basetype == SPIRType::SampledImage) + { + add_resource_name(var_id); + + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; + + for (uint32_t i = 0; i < plane_count; i++) + resources.push_back({ &var, to_name(var_id), SPIRType::Image, + get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index }); + + if (type.image.dim != DimBuffer && !constexpr_sampler) + { + resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler, + get_metal_resource_index(var, SPIRType::Sampler), 0, 0 }); + } + } + else if (!constexpr_sampler) + { + // constexpr samplers are not declared as resources. + add_resource_name(var_id); + resources.push_back({ &var, to_name(var_id), type.basetype, + get_metal_resource_index(var, type.basetype), 0, secondary_index }); + } + } + }); + + sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) { + return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index); + }); + + for (auto &r : resources) + { + auto &var = *r.var; + auto &type = get_variable_data_type(var); + + uint32_t var_id = var.self; + + switch (r.basetype) + { + case SPIRType::Struct: + { + auto &m = ir.meta[type.self]; + if (m.members.size() == 0) + break; + if (!type.array.empty()) + { + if (type.array.size() > 1) + SPIRV_CROSS_THROW("Arrays of arrays of buffers are not supported."); + + // Metal doesn't directly support this, so we must expand the + // array. We'll declare a local array to hold these elements + // later. + uint32_t array_size = to_array_size_literal(type); + + if (array_size == 0) + SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL."); + + // Allow Metal to use the array template to make arrays a value type + is_using_builtin_array = true; + buffer_arrays.push_back(var_id); + for (uint32_t i = 0; i < array_size; ++i) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) + + r.name + "_" + convert_to_string(i); + ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + is_using_builtin_array = false; + } + else + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += + get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + break; + } + case SPIRType::Sampler: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += sampler_type(type, var_id) + " " + r.name; + ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]"; + break; + case SPIRType::Image: + { + if (!ep_args.empty()) + ep_args += ", "; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + const auto &basetype = get(var.basetype); + if (!type_is_msl_framebuffer_fetch(basetype)) + { + ep_args += image_type_glsl(type, var_id) + " " + r.name; + if (r.plane > 0) + ep_args += join(plane_name_suffix, r.plane); + ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + else + { + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Framebuffer fetch on Mac is not supported before MSL 2.3."); + ep_args += image_type_glsl(type, var_id) + " " + r.name; + ep_args += " [[color(" + convert_to_string(r.index) + ")]]"; + } + + // Emulate texture2D atomic operations + if (atomic_image_vars.count(var.self)) + { + ep_args += ", device atomic_" + type_to_glsl(get(basetype.image.type), 0); + ep_args += "* " + r.name + "_atomic"; + ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + break; + } + default: + if (!ep_args.empty()) + ep_args += ", "; + if (!type.pointer) + ep_args += get_type_address_space(get(var.basetype), var_id) + " " + + type_to_glsl(type, var_id) + "& " + r.name; + else + ep_args += type_to_glsl(type, var_id) + " " + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + break; + } + } +} + +// Returns a string containing a comma-delimited list of args for the entry point function +// This is the "classic" method of MSL 1 when we don't have argument buffer support. +string CompilerMSL::entry_point_args_classic(bool append_comma) +{ + string ep_args = entry_point_arg_stage_in(); + entry_point_args_discrete_descriptors(ep_args); + entry_point_args_builtin(ep_args); + + if (!ep_args.empty() && append_comma) + ep_args += ", "; + + return ep_args; +} + +void CompilerMSL::fix_up_shader_inputs_outputs() +{ + auto &entry_func = this->get(ir.default_entry_point); + + // Emit a guard to ensure we don't execute beyond the last vertex. + // Vertex shaders shouldn't have the problems with barriers in non-uniform control flow that + // tessellation control shaders do, so early returns should be OK. We may need to revisit this + // if it ever becomes possible to use barriers from a vertex shader. + if (get_execution_model() == ExecutionModelVertex && msl_options.vertex_for_tessellation) + { + entry_func.fixup_hooks_in.push_back([this]() { + statement("if (any(", to_expression(builtin_invocation_id_id), + " >= ", to_expression(builtin_stage_input_size_id), "))"); + statement(" return;"); + }); + } + + // Look for sampled images and buffer. Add hooks to set up the swizzle constants or array lengths. + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = get_variable_data_type(var); + uint32_t var_id = var.self; + bool ssbo = has_decoration(type.self, DecorationBufferBlock); + + if (var.storage == StorageClassUniformConstant && !is_hidden_variable(var)) + { + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + { + entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { + bool is_array_type = !type.array.empty(); + + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + { + statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id), + is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]), + ".spvSwizzleConstants", "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + } + else + { + // If we have an array of images, we need to be able to index into it, so take a pointer instead. + statement("constant uint", is_array_type ? "* " : "& ", to_swizzle_expression(var_id), + is_array_type ? " = &" : " = ", to_name(swizzle_buffer_id), "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + } + }); + } + } + else if ((var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && ssbo)) && + !is_hidden_variable(var)) + { + if (buffers_requiring_array_length.count(var.self)) + { + entry_func.fixup_hooks_in.push_back([this, &type, &var, var_id]() { + bool is_array_type = !type.array.empty(); + + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + { + statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), + is_array_type ? " = &" : " = ", to_name(argument_buffer_ids[desc_set]), + ".spvBufferSizeConstants", "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + } + else + { + // If we have an array of images, we need to be able to index into it, so take a pointer instead. + statement("constant uint", is_array_type ? "* " : "& ", to_buffer_size_expression(var_id), + is_array_type ? " = &" : " = ", to_name(buffer_size_buffer_id), "[", + convert_to_string(get_metal_resource_index(var, type.basetype)), "];"); + } + }); + } + } + }); + + // Builtin variables + ir.for_each_typed_id([this, &entry_func](uint32_t, SPIRVariable &var) { + uint32_t var_id = var.self; + BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type; + + if (var.storage == StorageClassInput && is_builtin_variable(var) && active_input_builtins.get(bi_type)) + { + switch (bi_type) + { + case BuiltInSamplePosition: + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = get_sample_position(", + to_expression(builtin_sample_id_id), ");"); + }); + break; + case BuiltInFragCoord: + if (is_sample_rate()) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), ".xy += get_sample_position(", + to_expression(builtin_sample_id_id), ") - 0.5;"); + }); + } + break; + case BuiltInHelperInvocation: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.3 on iOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS."); + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_is_helper_thread();"); + }); + break; + case BuiltInInvocationId: + // This is direct-mapped without multi-patch workgroups. + if (get_execution_model() != ExecutionModelTessellationControl || !msl_options.multi_patch_workgroup) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_invocation_id_id), ".x % ", this->get_entry_point().output_vertices, + ";"); + }); + break; + case BuiltInPrimitiveId: + // This is natively supported by fragment and tessellation evaluation shaders. + // In tessellation control shaders, this is direct-mapped without multi-patch workgroups. + if (get_execution_model() != ExecutionModelTessellationControl || !msl_options.multi_patch_workgroup) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = min(", + to_expression(builtin_invocation_id_id), ".x / ", this->get_entry_point().output_vertices, + ", spvIndirectParams[1]);"); + }); + break; + case BuiltInPatchVertices: + if (get_execution_model() == ExecutionModelTessellationEvaluation) + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(patch_stage_in_var_id), ".gl_in.size();"); + }); + else + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = spvIndirectParams[0];"); + }); + break; + case BuiltInTessCoord: + // Emit a fixup to account for the shifted domain. Don't do this for triangles; + // MoltenVK will just reverse the winding order instead. + if (msl_options.tess_domain_origin_lower_left && !get_entry_point().flags.get(ExecutionModeTriangles)) + { + string tc = to_expression(var_id); + entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); }); + } + break; + case BuiltInSubgroupId: + if (!msl_options.emulate_subgroups) + break; + // For subgroup emulation, this is the same as the local invocation index. + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_local_invocation_index_id), ";"); + }); + break; + case BuiltInNumSubgroups: + if (!msl_options.emulate_subgroups) + break; + // For subgroup emulation, this is the same as the workgroup size. + entry_func.fixup_hooks_in.push_back([=]() { + auto &type = expression_type(builtin_workgroup_size_id); + string size_expr = to_expression(builtin_workgroup_size_id); + if (type.vecsize >= 3) + size_expr = join(size_expr, ".x * ", size_expr, ".y * ", size_expr, ".z"); + else if (type.vecsize == 2) + size_expr = join(size_expr, ".x * ", size_expr, ".y"); + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", size_expr, ";"); + }); + break; + case BuiltInSubgroupLocalInvocationId: + if (!msl_options.emulate_subgroups) + break; + // For subgroup emulation, assume subgroups of size 1. + entry_func.fixup_hooks_in.push_back( + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); }); + break; + case BuiltInSubgroupSize: + if (msl_options.emulate_subgroups) + { + // For subgroup emulation, assume subgroups of size 1. + entry_func.fixup_hooks_in.push_back( + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = 1;"); }); + } + else if (msl_options.fixed_subgroup_size != 0) + { + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.fixed_subgroup_size, ";"); + }); + } + break; + case BuiltInSubgroupEqMask: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + entry_func.fixup_hooks_in.push_back([=]() { + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", "uint4(1 << ", + to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_subgroup_invocation_id_id), " >= 32 ? uint4(0, (1 << (", + to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", + to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); + } + }); + break; + case BuiltInSubgroupGeMask: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + if (msl_options.fixed_subgroup_size != 0) + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); + entry_func.fixup_hooks_in.push_back([=]() { + // Case where index < 32, size < 32: + // mask0 = bfi(0, 0xFFFFFFFF, index, size - index); + // mask1 = bfi(0, 0xFFFFFFFF, 0, 0); // Gives 0 + // Case where index < 32 but size >= 32: + // mask0 = bfi(0, 0xFFFFFFFF, index, 32 - index); + // mask1 = bfi(0, 0xFFFFFFFF, 0, size - 32); + // Case where index >= 32: + // mask0 = bfi(0, 0xFFFFFFFF, 32, 0); // Gives 0 + // mask1 = bfi(0, 0xFFFFFFFF, index - 32, size - index); + // This is expressed without branches to avoid divergent + // control flow--hence the complicated min/max expressions. + // This is further complicated by the fact that if you attempt + // to bfi/bfe out-of-bounds on Metal, undefined behavior is the + // result. + if (msl_options.fixed_subgroup_size > 32) + { + // Don't use the subgroup size variable with fixed subgroup sizes, + // since the variables could be defined in the wrong order. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(32 - (int)", + to_expression(builtin_subgroup_invocation_id_id), + ", 0)), insert_bits(0u, 0xFFFFFFFF," + " (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), ", + msl_options.fixed_subgroup_size, " - max(", + to_expression(builtin_subgroup_invocation_id_id), + ", 32u)), uint2(0));"); + } + else if (msl_options.fixed_subgroup_size != 0) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), ", ", + msl_options.fixed_subgroup_size, " - ", + to_expression(builtin_subgroup_invocation_id_id), + "), uint3(0));"); + } + else if (msl_options.is_ios()) + { + // On iOS, the SIMD-group size will currently never exceed 32. + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), ", ", + to_expression(builtin_subgroup_size_id), " - ", + to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + ", 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), ", 32u), 0)), uint2(0));"); + } + }); + break; + case BuiltInSubgroupGtMask: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); + entry_func.fixup_hooks_in.push_back([=]() { + // The same logic applies here, except now the index is one + // more than the subgroup invocation ID. + if (msl_options.fixed_subgroup_size > 32) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(32 - (int)", + to_expression(builtin_subgroup_invocation_id_id), + " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), ", + msl_options.fixed_subgroup_size, " - max(", + to_expression(builtin_subgroup_invocation_id_id), + " + 1, 32u)), uint2(0));"); + } + else if (msl_options.fixed_subgroup_size != 0) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1, ", + msl_options.fixed_subgroup_size, " - ", + to_expression(builtin_subgroup_invocation_id_id), + " - 1), uint3(0));"); + } + else if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1, ", + to_expression(builtin_subgroup_size_id), " - ", + to_expression(builtin_subgroup_invocation_id_id), " - 1), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(insert_bits(0u, 0xFFFFFFFF, min(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), (uint)max(min((int)", + to_expression(builtin_subgroup_size_id), ", 32) - (int)", + to_expression(builtin_subgroup_invocation_id_id), + " - 1, 0)), insert_bits(0u, 0xFFFFFFFF, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0), (uint)max((int)", + to_expression(builtin_subgroup_size_id), " - (int)max(", + to_expression(builtin_subgroup_invocation_id_id), " + 1, 32u), 0)), uint2(0));"); + } + }); + break; + case BuiltInSubgroupLeMask: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); + entry_func.fixup_hooks_in.push_back([=]() { + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, ", + to_expression(builtin_subgroup_invocation_id_id), " + 1), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + " + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " + 1 - 32, 0)), uint2(0));"); + } + }); + break; + case BuiltInSubgroupLtMask: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.2 on iOS."); + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); + add_spv_func_and_recompile(SPVFuncImplSubgroupBallot); + entry_func.fixup_hooks_in.push_back([=]() { + if (msl_options.is_ios()) + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, ", + to_expression(builtin_subgroup_invocation_id_id), "), uint3(0));"); + } + else + { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = uint4(extract_bits(0xFFFFFFFF, 0, min(", + to_expression(builtin_subgroup_invocation_id_id), + ", 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)", + to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));"); + } + }); + break; + case BuiltInViewIndex: + if (!msl_options.multiview) + { + // According to the Vulkan spec, when not running under a multiview + // render pass, ViewIndex is 0. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); + }); + } + else if (msl_options.view_index_from_device_index) + { + // In this case, we take the view index from that of the device we're running on. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.device_index, ";"); + }); + // We actually don't want to set the render_target_array_index here. + // Since every physical device is rendering a different view, + // there's no need for layered rendering here. + } + else if (!msl_options.multiview_layered_rendering) + { + // In this case, the views are rendered one at a time. The view index, then, + // is just the first part of the "view mask". + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(view_mask_buffer_id), "[0];"); + }); + } + else if (get_execution_model() == ExecutionModelFragment) + { + // Because we adjusted the view index in the vertex shader, we have to + // adjust it back here. + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];"); + }); + } + else if (get_execution_model() == ExecutionModelVertex) + { + // Metal provides no special support for multiview, so we smuggle + // the view index in the instance index. + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(view_mask_buffer_id), "[0] + (", to_expression(builtin_instance_idx_id), + " - ", to_expression(builtin_base_instance_id), ") % ", + to_expression(view_mask_buffer_id), "[1];"); + statement(to_expression(builtin_instance_idx_id), " = (", + to_expression(builtin_instance_idx_id), " - ", + to_expression(builtin_base_instance_id), ") / ", to_expression(view_mask_buffer_id), + "[1] + ", to_expression(builtin_base_instance_id), ";"); + }); + // In addition to setting the variable itself, we also need to + // set the render_target_array_index with it on output. We have to + // offset this by the base view index, because Metal isn't in on + // our little game here. + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ", + to_expression(view_mask_buffer_id), "[0];"); + }); + } + break; + case BuiltInDeviceIndex: + // Metal pipelines belong to the devices which create them, so we'll + // need to create a MTLPipelineState for every MTLDevice in a grouped + // VkDevice. We can assume, then, that the device index is constant. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + msl_options.device_index, ";"); + }); + break; + case BuiltInWorkgroupId: + if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInWorkgroupId)) + break; + + // The vkCmdDispatchBase() command lets the client set the base value + // of WorkgroupId. Metal has no direct equivalent; we must make this + // adjustment ourselves. + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), ";"); + }); + break; + case BuiltInGlobalInvocationId: + if (!msl_options.dispatch_base || !active_input_builtins.get(BuiltInGlobalInvocationId)) + break; + + // GlobalInvocationId is defined as LocalInvocationId + WorkgroupId * WorkgroupSize. + // This needs to be adjusted too. + entry_func.fixup_hooks_in.push_back([=]() { + auto &execution = this->get_entry_point(); + uint32_t workgroup_size_id = execution.workgroup_size.constant; + if (workgroup_size_id) + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), + " * ", to_expression(workgroup_size_id), ";"); + else + statement(to_expression(var_id), " += ", to_dereferenced_expression(builtin_dispatch_base_id), + " * uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ", + execution.workgroup_size.z, ");"); + }); + break; + case BuiltInVertexId: + case BuiltInVertexIndex: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + builtin_declaration = true; + switch (msl_options.vertex_index_type) + { + case Options::IndexType::None: + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_invocation_id_id), ".x + ", + to_expression(builtin_dispatch_base_id), ".x;"); + break; + case Options::IndexType::UInt16: + case Options::IndexType::UInt32: + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", index_buffer_var_name, + "[", to_expression(builtin_invocation_id_id), ".x] + ", + to_expression(builtin_dispatch_base_id), ".x;"); + break; + } + builtin_declaration = false; + }); + break; + case BuiltInBaseVertex: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_dispatch_base_id), ".x;"); + }); + break; + case BuiltInInstanceId: + case BuiltInInstanceIndex: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + builtin_declaration = true; + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_invocation_id_id), ".y + ", to_expression(builtin_dispatch_base_id), + ".y;"); + builtin_declaration = false; + }); + break; + case BuiltInBaseInstance: + // This is direct-mapped normally. + if (!msl_options.vertex_for_tessellation) + break; + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(builtin_dispatch_base_id), ".y;"); + }); + break; + default: + break; + } + } + else if (var.storage == StorageClassOutput && is_builtin_variable(var) && active_output_builtins.get(bi_type)) + { + if (bi_type == BuiltInSampleMask && get_execution_model() == ExecutionModelFragment && + msl_options.additional_fixed_sample_mask != 0xffffffff) + { + // If the additional fixed sample mask was set, we need to adjust the sample_mask + // output to reflect that. If the shader outputs the sample_mask itself too, we need + // to AND the two masks to get the final one. + if (does_shader_write_sample_mask) + { + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_sample_mask_id), + " &= ", msl_options.additional_fixed_sample_mask, ";"); + }); + } + else + { + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_sample_mask_id), " = ", + msl_options.additional_fixed_sample_mask, ";"); + }); + } + } + } + }); +} + +// Returns the Metal index of the resource of the specified type as used by the specified variable. +uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane) +{ + auto &execution = get_entry_point(); + auto &var_dec = ir.meta[var.self].decoration; + auto &var_type = get(var.basetype); + uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set; + uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding; + + // If a matching binding has been specified, find and use it. + auto itr = resource_bindings.find({ execution.model, var_desc_set, var_binding }); + + // Atomic helper buffers for image atomics need to use secondary bindings as well. + bool use_secondary_binding = (var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler) || + basetype == SPIRType::AtomicCounter; + + auto resource_decoration = + use_secondary_binding ? SPIRVCrossDecorationResourceIndexSecondary : SPIRVCrossDecorationResourceIndexPrimary; + + if (plane == 1) + resource_decoration = SPIRVCrossDecorationResourceIndexTertiary; + if (plane == 2) + resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary; + + if (itr != end(resource_bindings)) + { + auto &remap = itr->second; + remap.second = true; + switch (basetype) + { + case SPIRType::Image: + set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane); + return remap.first.msl_texture + plane; + case SPIRType::Sampler: + set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler); + return remap.first.msl_sampler; + default: + set_extended_decoration(var.self, resource_decoration, remap.first.msl_buffer); + return remap.first.msl_buffer; + } + } + + // If we have already allocated an index, keep using it. + if (has_extended_decoration(var.self, resource_decoration)) + return get_extended_decoration(var.self, resource_decoration); + + // Allow user to enable decoration binding + if (msl_options.enable_decoration_binding) + { + // If there is no explicit mapping of bindings to MSL, use the declared binding. + if (has_decoration(var.self, DecorationBinding)) + { + var_binding = get_decoration(var.self, DecorationBinding); + // Avoid emitting sentinel bindings. + if (var_binding < 0x80000000u) + return var_binding; + } + } + + // If we did not explicitly remap, allocate bindings on demand. + // We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different. + + bool allocate_argument_buffer_ids = false; + + if (var.storage != StorageClassPushConstant) + allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(var_desc_set); + + uint32_t binding_stride = 1; + auto &type = get(var.basetype); + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + binding_stride *= to_array_size_literal(type, i); + + assert(binding_stride != 0); + + // If a binding has not been specified, revert to incrementing resource indices. + uint32_t resource_index; + + if (type_is_msl_framebuffer_fetch(type)) + { + // Frame-buffer fetch gets its fallback resource index from the input attachment index, + // which is then treated as color index. + resource_index = get_decoration(var.self, DecorationInputAttachmentIndex); + } + else if (allocate_argument_buffer_ids) + { + // Allocate from a flat ID binding space. + resource_index = next_metal_resource_ids[var_desc_set]; + next_metal_resource_ids[var_desc_set] += binding_stride; + } + else + { + // Allocate from plain bindings which are allocated per resource type. + switch (basetype) + { + case SPIRType::Image: + resource_index = next_metal_resource_index_texture; + next_metal_resource_index_texture += binding_stride; + break; + case SPIRType::Sampler: + resource_index = next_metal_resource_index_sampler; + next_metal_resource_index_sampler += binding_stride; + break; + default: + resource_index = next_metal_resource_index_buffer; + next_metal_resource_index_buffer += binding_stride; + break; + } + } + + set_extended_decoration(var.self, resource_decoration, resource_index); + return resource_index; +} + +bool CompilerMSL::type_is_msl_framebuffer_fetch(const SPIRType &type) const +{ + return type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + msl_options.use_framebuffer_fetch_subpasses; +} + +string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + auto &var = get(arg.id); + auto &type = get_variable_data_type(var); + auto &var_type = get(arg.type); + StorageClass storage = var_type.storage; + bool is_pointer = var_type.pointer; + + // If we need to modify the name of the variable, make sure we use the original variable. + // Our alias is just a shadow variable. + uint32_t name_id = var.self; + if (arg.alias_global_variable && var.basevariable) + name_id = var.basevariable; + + bool constref = !arg.alias_global_variable && is_pointer && arg.write_count == 0; + // Framebuffer fetch is plain value, const looks out of place, but it is not wrong. + if (type_is_msl_framebuffer_fetch(type)) + constref = false; + + bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::Sampler; + + // Arrays of images/samplers in MSL are always const. + if (!type.array.empty() && type_is_image) + constref = true; + + string decl; + if (constref) + decl += "const "; + + // If this is a combined image-sampler for a 2D image with floating-point type, + // we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter + // for a global, then we need to emit a "dynamic" combined image-sampler. + // Unfortunately, this is necessary to properly support passing around + // combined image-samplers with Y'CbCr conversions on them. + bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage && + type.image.dim == Dim2D && type_is_floating_point(get(type.image.type)) && + spv_function_implementations.count(SPVFuncImplDynamicImageSampler); + + // Allow Metal to use the array template to make arrays a value type + string address_space = get_argument_address_space(var); + bool builtin = is_builtin_variable(var); + is_using_builtin_array = builtin; + if (address_space == "threadgroup") + is_using_builtin_array = true; + + if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) + decl += type_to_glsl(type, arg.id); + else if (builtin) + decl += builtin_type_decl(static_cast(get_decoration(arg.id, DecorationBuiltIn)), arg.id); + else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) + { + is_using_builtin_array = true; + decl += join(type_to_glsl(type, arg.id), "*"); + } + else if (is_dynamic_img_sampler) + { + decl += join("spvDynamicImageSampler<", type_to_glsl(get(type.image.type)), ">"); + // Mark the variable so that we can handle passing it to another function. + set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + } + else + decl += type_to_glsl(type, arg.id); + + bool opaque_handle = storage == StorageClassUniformConstant; + + if (!builtin && !opaque_handle && !is_pointer && + (storage == StorageClassFunction || storage == StorageClassGeneric)) + { + // If the argument is a pure value and not an opaque type, we will pass by value. + if (msl_options.force_native_arrays && is_array(type)) + { + // We are receiving an array by value. This is problematic. + // We cannot be sure of the target address space since we are supposed to receive a copy, + // but this is not possible with MSL without some extra work. + // We will have to assume we're getting a reference in thread address space. + // If we happen to get a reference in constant address space, the caller must emit a copy and pass that. + // Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from + // non-constant arrays, but we can create thread const from constant. + decl = string("thread const ") + decl; + decl += " (&"; + const char *restrict_kw = to_restrict(name_id); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } + decl += to_expression(name_id); + decl += ")"; + decl += type_to_array_glsl(type); + } + else + { + if (!address_space.empty()) + decl = join(address_space, " ", decl); + decl += " "; + decl += to_expression(name_id); + } + } + else if (is_array(type) && !type_is_image) + { + // Arrays of images and samplers are special cased. + if (!address_space.empty()) + decl = join(address_space, " ", decl); + + if (msl_options.argument_buffers) + { + uint32_t desc_set = get_decoration(name_id, DecorationDescriptorSet); + if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && + descriptor_set_is_argument_buffer(desc_set)) + { + // An awkward case where we need to emit *more* address space declarations (yay!). + // An example is where we pass down an array of buffer pointers to leaf functions. + // It's a constant array containing pointers to constants. + // The pointer array is always constant however. E.g. + // device SSBO * constant (&array)[N]. + // const device SSBO * constant (&array)[N]. + // constant SSBO * constant (&array)[N]. + // However, this only matters for argument buffers, since for MSL 1.0 style codegen, + // we emit the buffer array on stack instead, and that seems to work just fine apparently. + + // If the argument was marked as being in device address space, any pointer to member would + // be const device, not constant. + if (argument_buffer_device_storage_mask & (1u << desc_set)) + decl += " const device"; + else + decl += " constant"; + } + } + + decl += " (&"; + const char *restrict_kw = to_restrict(name_id); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } + decl += to_expression(name_id); + decl += ")"; + decl += type_to_array_glsl(type); + } + else if (!opaque_handle && (!pull_model_inputs.count(var.basevariable) || type.basetype == SPIRType::Struct)) + { + // If this is going to be a reference to a variable pointer, the address space + // for the reference has to go before the '&', but after the '*'. + if (!address_space.empty()) + { + if (decl.back() == '*') + decl += join(" ", address_space, " "); + else + decl = join(address_space, " ", decl); + } + decl += "&"; + decl += " "; + decl += to_restrict(name_id); + decl += to_expression(name_id); + } + else + { + if (!address_space.empty()) + decl = join(address_space, " ", decl); + decl += " "; + decl += to_expression(name_id); + } + + // Emulate texture2D atomic operations + auto *backing_var = maybe_get_backing_variable(name_id); + if (backing_var && atomic_image_vars.count(backing_var->self)) + { + decl += ", device atomic_" + type_to_glsl(get(var_type.image.type), 0); + decl += "* " + to_expression(name_id) + "_atomic"; + } + + is_using_builtin_array = false; + + return decl; +} + +// If we're currently in the entry point function, and the object +// has a qualified name, use it, otherwise use the standard name. +string CompilerMSL::to_name(uint32_t id, bool allow_alias) const +{ + if (current_function && (current_function->self == ir.default_entry_point)) + { + auto *m = ir.find_meta(id); + if (m && !m->decoration.qualified_alias.empty()) + return m->decoration.qualified_alias; + } + return Compiler::to_name(id, allow_alias); +} + +// Returns a name that combines the name of the struct with the name of the member, except for Builtins +string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t index) +{ + // Don't qualify Builtin names because they are unique and are treated as such when building expressions + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) + return builtin_to_glsl(builtin, type.storage); + + // Strip any underscore prefix from member name + string mbr_name = to_member_name(type, index); + size_t startPos = mbr_name.find_first_not_of("_"); + mbr_name = (startPos != string::npos) ? mbr_name.substr(startPos) : ""; + return join(to_name(type.self), "_", mbr_name); +} + +// Ensures that the specified name is permanently usable by prepending a prefix +// if the first chars are _ and a digit, which indicate a transient name. +string CompilerMSL::ensure_valid_name(string name, string pfx) +{ + return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name; +} + +// Replace all names that match MSL keywords or Metal Standard Library functions. +void CompilerMSL::replace_illegal_names() +{ + // FIXME: MSL and GLSL are doing two different things here. + // Agree on convention and remove this override. + static const unordered_set keywords = { + "kernel", + "vertex", + "fragment", + "compute", + "bias", + "level", + "gradient2d", + "gradientcube", + "gradient3d", + "min_lod_clamp", + "assert", + "VARIABLE_TRACEPOINT", + "STATIC_DATA_TRACEPOINT", + "STATIC_DATA_TRACEPOINT_V", + "METAL_ALIGN", + "METAL_ASM", + "METAL_CONST", + "METAL_DEPRECATED", + "METAL_ENABLE_IF", + "METAL_FUNC", + "METAL_INTERNAL", + "METAL_NON_NULL_RETURN", + "METAL_NORETURN", + "METAL_NOTHROW", + "METAL_PURE", + "METAL_UNAVAILABLE", + "METAL_IMPLICIT", + "METAL_EXPLICIT", + "METAL_CONST_ARG", + "METAL_ARG_UNIFORM", + "METAL_ZERO_ARG", + "METAL_VALID_LOD_ARG", + "METAL_VALID_LEVEL_ARG", + "METAL_VALID_STORE_ORDER", + "METAL_VALID_LOAD_ORDER", + "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER", + "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS", + "METAL_VALID_RENDER_TARGET", + "is_function_constant_defined", + "CHAR_BIT", + "SCHAR_MAX", + "SCHAR_MIN", + "UCHAR_MAX", + "CHAR_MAX", + "CHAR_MIN", + "USHRT_MAX", + "SHRT_MAX", + "SHRT_MIN", + "UINT_MAX", + "INT_MAX", + "INT_MIN", + "FLT_DIG", + "FLT_MANT_DIG", + "FLT_MAX_10_EXP", + "FLT_MAX_EXP", + "FLT_MIN_10_EXP", + "FLT_MIN_EXP", + "FLT_RADIX", + "FLT_MAX", + "FLT_MIN", + "FLT_EPSILON", + "FP_ILOGB0", + "FP_ILOGBNAN", + "MAXFLOAT", + "HUGE_VALF", + "INFINITY", + "NAN", + "M_E_F", + "M_LOG2E_F", + "M_LOG10E_F", + "M_LN2_F", + "M_LN10_F", + "M_PI_F", + "M_PI_2_F", + "M_PI_4_F", + "M_1_PI_F", + "M_2_PI_F", + "M_2_SQRTPI_F", + "M_SQRT2_F", + "M_SQRT1_2_F", + "HALF_DIG", + "HALF_MANT_DIG", + "HALF_MAX_10_EXP", + "HALF_MAX_EXP", + "HALF_MIN_10_EXP", + "HALF_MIN_EXP", + "HALF_RADIX", + "HALF_MAX", + "HALF_MIN", + "HALF_EPSILON", + "MAXHALF", + "HUGE_VALH", + "M_E_H", + "M_LOG2E_H", + "M_LOG10E_H", + "M_LN2_H", + "M_LN10_H", + "M_PI_H", + "M_PI_2_H", + "M_PI_4_H", + "M_1_PI_H", + "M_2_PI_H", + "M_2_SQRTPI_H", + "M_SQRT2_H", + "M_SQRT1_2_H", + "DBL_DIG", + "DBL_MANT_DIG", + "DBL_MAX_10_EXP", + "DBL_MAX_EXP", + "DBL_MIN_10_EXP", + "DBL_MIN_EXP", + "DBL_RADIX", + "DBL_MAX", + "DBL_MIN", + "DBL_EPSILON", + "HUGE_VAL", + "M_E", + "M_LOG2E", + "M_LOG10E", + "M_LN2", + "M_LN10", + "M_PI", + "M_PI_2", + "M_PI_4", + "M_1_PI", + "M_2_PI", + "M_2_SQRTPI", + "M_SQRT2", + "M_SQRT1_2", + "quad_broadcast", + }; + + static const unordered_set illegal_func_names = { + "main", + "saturate", + "assert", + "fmin3", + "fmax3", + "VARIABLE_TRACEPOINT", + "STATIC_DATA_TRACEPOINT", + "STATIC_DATA_TRACEPOINT_V", + "METAL_ALIGN", + "METAL_ASM", + "METAL_CONST", + "METAL_DEPRECATED", + "METAL_ENABLE_IF", + "METAL_FUNC", + "METAL_INTERNAL", + "METAL_NON_NULL_RETURN", + "METAL_NORETURN", + "METAL_NOTHROW", + "METAL_PURE", + "METAL_UNAVAILABLE", + "METAL_IMPLICIT", + "METAL_EXPLICIT", + "METAL_CONST_ARG", + "METAL_ARG_UNIFORM", + "METAL_ZERO_ARG", + "METAL_VALID_LOD_ARG", + "METAL_VALID_LEVEL_ARG", + "METAL_VALID_STORE_ORDER", + "METAL_VALID_LOAD_ORDER", + "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER", + "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS", + "METAL_VALID_RENDER_TARGET", + "is_function_constant_defined", + "CHAR_BIT", + "SCHAR_MAX", + "SCHAR_MIN", + "UCHAR_MAX", + "CHAR_MAX", + "CHAR_MIN", + "USHRT_MAX", + "SHRT_MAX", + "SHRT_MIN", + "UINT_MAX", + "INT_MAX", + "INT_MIN", + "FLT_DIG", + "FLT_MANT_DIG", + "FLT_MAX_10_EXP", + "FLT_MAX_EXP", + "FLT_MIN_10_EXP", + "FLT_MIN_EXP", + "FLT_RADIX", + "FLT_MAX", + "FLT_MIN", + "FLT_EPSILON", + "FP_ILOGB0", + "FP_ILOGBNAN", + "MAXFLOAT", + "HUGE_VALF", + "INFINITY", + "NAN", + "M_E_F", + "M_LOG2E_F", + "M_LOG10E_F", + "M_LN2_F", + "M_LN10_F", + "M_PI_F", + "M_PI_2_F", + "M_PI_4_F", + "M_1_PI_F", + "M_2_PI_F", + "M_2_SQRTPI_F", + "M_SQRT2_F", + "M_SQRT1_2_F", + "HALF_DIG", + "HALF_MANT_DIG", + "HALF_MAX_10_EXP", + "HALF_MAX_EXP", + "HALF_MIN_10_EXP", + "HALF_MIN_EXP", + "HALF_RADIX", + "HALF_MAX", + "HALF_MIN", + "HALF_EPSILON", + "MAXHALF", + "HUGE_VALH", + "M_E_H", + "M_LOG2E_H", + "M_LOG10E_H", + "M_LN2_H", + "M_LN10_H", + "M_PI_H", + "M_PI_2_H", + "M_PI_4_H", + "M_1_PI_H", + "M_2_PI_H", + "M_2_SQRTPI_H", + "M_SQRT2_H", + "M_SQRT1_2_H", + "DBL_DIG", + "DBL_MANT_DIG", + "DBL_MAX_10_EXP", + "DBL_MAX_EXP", + "DBL_MIN_10_EXP", + "DBL_MIN_EXP", + "DBL_RADIX", + "DBL_MAX", + "DBL_MIN", + "DBL_EPSILON", + "HUGE_VAL", + "M_E", + "M_LOG2E", + "M_LOG10E", + "M_LN2", + "M_LN10", + "M_PI", + "M_PI_2", + "M_PI_4", + "M_1_PI", + "M_2_PI", + "M_2_SQRTPI", + "M_SQRT2", + "M_SQRT1_2", + }; + + ir.for_each_typed_id([&](uint32_t self, SPIRVariable &) { + auto *meta = ir.find_meta(self); + if (!meta) + return; + + auto &dec = meta->decoration; + if (keywords.find(dec.alias) != end(keywords)) + dec.alias += "0"; + }); + + ir.for_each_typed_id([&](uint32_t self, SPIRFunction &) { + auto *meta = ir.find_meta(self); + if (!meta) + return; + + auto &dec = meta->decoration; + if (illegal_func_names.find(dec.alias) != end(illegal_func_names)) + dec.alias += "0"; + }); + + ir.for_each_typed_id([&](uint32_t self, SPIRType &) { + auto *meta = ir.find_meta(self); + if (!meta) + return; + + for (auto &mbr_dec : meta->members) + if (keywords.find(mbr_dec.alias) != end(keywords)) + mbr_dec.alias += "0"; + }); + + for (auto &entry : ir.entry_points) + { + // Change both the entry point name and the alias, to keep them synced. + string &ep_name = entry.second.name; + if (illegal_func_names.find(ep_name) != end(illegal_func_names)) + ep_name += "0"; + + // Always write this because entry point might have been renamed earlier. + ir.meta[entry.first].decoration.alias = ep_name; + } + + CompilerGLSL::replace_illegal_names(); +} + +string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) +{ + if (index < uint32_t(type.member_type_index_redirection.size())) + index = type.member_type_index_redirection[index]; + + auto *var = maybe_get(base); + // If this is a buffer array, we have to dereference the buffer pointers. + // Otherwise, if this is a pointer expression, dereference it. + + bool declared_as_pointer = false; + + if (var) + { + // Only allow -> dereference for block types. This is so we get expressions like + // buffer[i]->first_member.second_member, rather than buffer[i]->first->second. + bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + bool is_buffer_variable = + is_block && (var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer); + declared_as_pointer = is_buffer_variable && is_array(get(var->basetype)); + } + + if (declared_as_pointer || (!ptr_chain && should_dereference(base))) + return join("->", to_member_name(type, index)); + else + return join(".", to_member_name(type, index)); +} + +string CompilerMSL::to_qualifiers_glsl(uint32_t id) +{ + string quals; + + auto &type = expression_type(id); + if (type.storage == StorageClassWorkgroup) + quals += "threadgroup "; + + return quals; +} + +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + string type_name; + + // Pointer? + if (type.pointer) + { + const char *restrict_kw; + type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get(type.parent_type), id)); + + switch (type.basetype) + { + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + // These are handles. + break; + default: + // Anything else can be a raw pointer. + type_name += "*"; + restrict_kw = to_restrict(id); + if (*restrict_kw) + { + type_name += " "; + type_name += restrict_kw; + } + break; + } + return type_name; + } + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + // Allow Metal to use the array template to make arrays a value type + type_name = to_name(type.self); + break; + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type, id); + + case SPIRType::Sampler: + return sampler_type(type, id); + + case SPIRType::Void: + return "void"; + + case SPIRType::AtomicCounter: + return "atomic_uint"; + + case SPIRType::ControlPointArray: + return join("patch_control_point<", type_to_glsl(get(type.parent_type), id), ">"); + + case SPIRType::Interpolant: + return join("interpolant<", type_to_glsl(get(type.parent_type), id), ", interpolation::", + has_decoration(type.self, DecorationNoPerspective) ? "no_perspective" : "perspective", ">"); + + // Scalars + case SPIRType::Boolean: + type_name = "bool"; + break; + case SPIRType::Char: + case SPIRType::SByte: + type_name = "char"; + break; + case SPIRType::UByte: + type_name = "uchar"; + break; + case SPIRType::Short: + type_name = "short"; + break; + case SPIRType::UShort: + type_name = "ushort"; + break; + case SPIRType::Int: + type_name = "int"; + break; + case SPIRType::UInt: + type_name = "uint"; + break; + case SPIRType::Int64: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above."); + type_name = "long"; + break; + case SPIRType::UInt64: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("64-bit integers are only supported in MSL 2.2 and above."); + type_name = "ulong"; + break; + case SPIRType::Half: + type_name = "half"; + break; + case SPIRType::Float: + type_name = "float"; + break; + case SPIRType::Double: + type_name = "double"; // Currently unsupported + break; + + default: + return "unknown_type"; + } + + // Matrix? + if (type.columns > 1) + type_name += to_string(type.columns) + "x"; + + // Vector or Matrix? + if (type.vecsize > 1) + type_name += to_string(type.vecsize); + + if (type.array.empty() || using_builtin_array()) + { + return type_name; + } + else + { + // Allow Metal to use the array template to make arrays a value type + add_spv_func_and_recompile(SPVFuncImplUnsafeArray); + string res; + string sizes; + + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + { + res += "spvUnsafeArray<"; + sizes += ", "; + sizes += to_array_size(type, i); + sizes += ">"; + } + + res += type_name + sizes; + return res; + } +} + +string CompilerMSL::type_to_array_glsl(const SPIRType &type) +{ + // Allow Metal to use the array template to make arrays a value type + switch (type.basetype) + { + case SPIRType::AtomicCounter: + case SPIRType::ControlPointArray: + { + return CompilerGLSL::type_to_array_glsl(type); + } + default: + { + if (using_builtin_array()) + return CompilerGLSL::type_to_array_glsl(type); + else + return ""; + } + } +} + +// Threadgroup arrays can't have a wrapper type +std::string CompilerMSL::variable_decl(const SPIRVariable &variable) +{ + if (variable.storage == StorageClassWorkgroup) + { + is_using_builtin_array = true; + } + std::string expr = CompilerGLSL::variable_decl(variable); + if (variable.storage == StorageClassWorkgroup) + { + is_using_builtin_array = false; + } + return expr; +} + +// GCC workaround of lambdas calling protected funcs +std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id) +{ + return CompilerGLSL::variable_decl(type, name, id); +} + +std::string CompilerMSL::sampler_type(const SPIRType &type, uint32_t id) +{ + auto *var = maybe_get(id); + if (var && var->basevariable) + { + // Check against the base variable, and not a fake ID which might have been generated for this variable. + id = var->basevariable; + } + + if (!type.array.empty()) + { + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of samplers."); + + if (type.array.size() > 1) + SPIRV_CROSS_THROW("Arrays of arrays of samplers are not supported in MSL."); + + // Arrays of samplers in MSL must be declared with a special array syntax ala C++11 std::array. + // If we have a runtime array, it could be a variable-count descriptor set binding. + uint32_t array_size = to_array_size_literal(type); + if (array_size == 0) + array_size = get_resource_array_size(id); + + if (array_size == 0) + SPIRV_CROSS_THROW("Unsized array of samplers is not supported in MSL."); + + auto &parent = get(get_pointee_type(type).parent_type); + return join("array<", sampler_type(parent, id), ", ", array_size, ">"); + } + else + return "sampler"; +} + +// Returns an MSL string describing the SPIR-V image type +string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) +{ + auto *var = maybe_get(id); + if (var && var->basevariable) + { + // For comparison images, check against the base variable, + // and not the fake ID which might have been generated for this variable. + id = var->basevariable; + } + + if (!type.array.empty()) + { + uint32_t major = 2, minor = 0; + if (msl_options.is_ios()) + { + major = 1; + minor = 2; + } + if (!msl_options.supports_msl_version(major, minor)) + { + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("MSL 1.2 or greater is required for arrays of textures."); + else + SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of textures."); + } + + if (type.array.size() > 1) + SPIRV_CROSS_THROW("Arrays of arrays of textures are not supported in MSL."); + + // Arrays of images in MSL must be declared with a special array syntax ala C++11 std::array. + // If we have a runtime array, it could be a variable-count descriptor set binding. + uint32_t array_size = to_array_size_literal(type); + if (array_size == 0) + array_size = get_resource_array_size(id); + + if (array_size == 0) + SPIRV_CROSS_THROW("Unsized array of images is not supported in MSL."); + + auto &parent = get(get_pointee_type(type).parent_type); + return join("array<", image_type_glsl(parent, id), ", ", array_size, ">"); + } + + string img_type_name; + + // Bypass pointers because we need the real image struct + auto &img_type = get(type.self).image; + if (image_is_comparison(type, id)) + { + switch (img_type.dim) + { + case Dim1D: + case Dim2D: + if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D) + { + // Use a native Metal 1D texture + img_type_name += "depth1d_unsupported_by_metal"; + break; + } + + if (img_type.ms && img_type.arrayed) + { + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1."); + img_type_name += "depth2d_ms_array"; + } + else if (img_type.ms) + img_type_name += "depth2d_ms"; + else if (img_type.arrayed) + img_type_name += "depth2d_array"; + else + img_type_name += "depth2d"; + break; + case Dim3D: + img_type_name += "depth3d_unsupported_by_metal"; + break; + case DimCube: + if (!msl_options.emulate_cube_array) + img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube"); + else + img_type_name += (img_type.arrayed ? "depth2d_array" : "depthcube"); + break; + default: + img_type_name += "unknown_depth_texture_type"; + break; + } + } + else + { + switch (img_type.dim) + { + case DimBuffer: + if (img_type.ms || img_type.arrayed) + SPIRV_CROSS_THROW("Cannot use texel buffers with multisampling or array layers."); + + if (msl_options.texture_buffer_native) + { + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Native texture_buffer type is only supported in MSL 2.1."); + img_type_name = "texture_buffer"; + } + else + img_type_name += "texture2d"; + break; + case Dim1D: + case Dim2D: + case DimSubpassData: + { + bool subpass_array = + img_type.dim == DimSubpassData && (msl_options.multiview || msl_options.arrayed_subpass_input); + if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D) + { + // Use a native Metal 1D texture + img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d"); + break; + } + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (type_is_msl_framebuffer_fetch(type)) + { + auto img_type_4 = get(img_type.type); + img_type_4.vecsize = 4; + return type_to_glsl(img_type_4); + } + if (img_type.ms && (img_type.arrayed || subpass_array)) + { + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1."); + img_type_name += "texture2d_ms_array"; + } + else if (img_type.ms) + img_type_name += "texture2d_ms"; + else if (img_type.arrayed || subpass_array) + img_type_name += "texture2d_array"; + else + img_type_name += "texture2d"; + break; + } + case Dim3D: + img_type_name += "texture3d"; + break; + case DimCube: + if (!msl_options.emulate_cube_array) + img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube"); + else + img_type_name += (img_type.arrayed ? "texture2d_array" : "texturecube"); + break; + default: + img_type_name += "unknown_texture_type"; + break; + } + } + + // Append the pixel type + img_type_name += "<"; + img_type_name += type_to_glsl(get(img_type.type)); + + // For unsampled images, append the sample/read/write access qualifier. + // For kernel images, the access qualifier my be supplied directly by SPIR-V. + // Otherwise it may be set based on whether the image is read from or written to within the shader. + if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) + { + switch (img_type.access) + { + case AccessQualifierReadOnly: + img_type_name += ", access::read"; + break; + + case AccessQualifierWriteOnly: + img_type_name += ", access::write"; + break; + + case AccessQualifierReadWrite: + img_type_name += ", access::read_write"; + break; + + default: + { + auto *p_var = maybe_get_backing_variable(id); + if (p_var && p_var->basevariable) + p_var = maybe_get(p_var->basevariable); + if (p_var && !has_decoration(p_var->self, DecorationNonWritable)) + { + img_type_name += ", access::"; + + if (!has_decoration(p_var->self, DecorationNonReadable)) + img_type_name += "read_"; + + img_type_name += "write"; + } + break; + } + } + } + + img_type_name += ">"; + + return img_type_name; +} + +void CompilerMSL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + if (msl_options.emulate_subgroups) + { + // In this mode, only the GroupNonUniform cap is supported. The only op + // we need to handle, then, is OpGroupNonUniformElect. + if (op != OpGroupNonUniformElect) + SPIRV_CROSS_THROW("Subgroup emulation does not support operations other than Elect."); + // In this mode, the subgroup size is assumed to be one, so every invocation + // is elected. + emit_op(ops[0], ops[1], "true", true); + return; + } + + // Metal 2.0 is required. iOS only supports quad ops on 11.0 (2.0), with + // full support in 13.0 (2.2). macOS only supports broadcast and shuffle on + // 10.13 (2.0), with full support in 10.14 (2.1). + // Note that Apple GPUs before A13 make no distinction between a quad-group + // and a SIMD-group; all SIMD-groups are quad-groups on those. + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroups are only supported in Metal 2.0 and up."); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + if (msl_options.is_ios() && (!msl_options.supports_msl_version(2, 3) || !msl_options.ios_use_simdgroup_functions)) + { + switch (op) + { + default: + SPIRV_CROSS_THROW("Subgroup ops beyond broadcast, ballot, and shuffle on iOS require Metal 2.3 and up."); + case OpGroupNonUniformBroadcastFirst: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("BroadcastFirst on iOS requires Metal 2.2 and up."); + break; + case OpGroupNonUniformElect: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Elect on iOS requires Metal 2.2 and up."); + break; + case OpGroupNonUniformAny: + case OpGroupNonUniformAll: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformBallotBitCount: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up."); + break; + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + break; + } + } + + if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + { + switch (op) + { + default: + SPIRV_CROSS_THROW("Subgroup ops beyond broadcast and shuffle on macOS require Metal 2.1 and up."); + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + break; + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + switch (op) + { + case OpGroupNonUniformElect: + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + emit_op(result_type, id, "quad_is_first()", false); + else + emit_op(result_type, id, "simd_is_first()", false); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBroadcast"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBroadcastFirst"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupBallot"); + break; + + case OpGroupNonUniformInverseBallot: + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_invocation_id_id, "spvSubgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotBitExtract: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotFindLSB: + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindLSB"); + break; + + case OpGroupNonUniformBallotFindMSB: + emit_binary_func_op(result_type, id, ops[3], builtin_subgroup_size_id, "spvSubgroupBallotFindMSB"); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + switch (operation) + { + case GroupOperationReduce: + emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_size_id, "spvSubgroupBallotBitCount"); + break; + case GroupOperationInclusiveScan: + emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, + "spvSubgroupBallotInclusiveBitCount"); + break; + case GroupOperationExclusiveScan: + emit_binary_func_op(result_type, id, ops[4], builtin_subgroup_invocation_id_id, + "spvSubgroupBallotExclusiveBitCount"); + break; + default: + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + break; + } + + case OpGroupNonUniformShuffle: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffle"); + break; + + case OpGroupNonUniformShuffleXor: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleXor"); + break; + + case OpGroupNonUniformShuffleUp: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleUp"); + break; + + case OpGroupNonUniformShuffleDown: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvSubgroupShuffleDown"); + break; + + case OpGroupNonUniformAll: + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + emit_unary_func_op(result_type, id, ops[3], "quad_all"); + else + emit_unary_func_op(result_type, id, ops[3], "simd_all"); + break; + + case OpGroupNonUniformAny: + if (msl_options.is_ios() && !msl_options.ios_use_simdgroup_functions) + emit_unary_func_op(result_type, id, ops[3], "quad_any"); + else + emit_unary_func_op(result_type, id, ops[3], "simd_any"); + break; + + case OpGroupNonUniformAllEqual: + emit_unary_func_op(result_type, id, ops[3], "spvSubgroupAllEqual"); + break; + + // clang-format off +#define MSL_GROUP_OP(op, msl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "simd_prefix_inclusive_" #msl_op); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "simd_prefix_exclusive_" #msl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + /* Only cluster sizes of 4 are supported. */ \ + uint32_t cluster_size = evaluate_constant_u32(ops[5]); \ + if (cluster_size != 4) \ + SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ + emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \ + } \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + MSL_GROUP_OP(FAdd, sum) + MSL_GROUP_OP(FMul, product) + MSL_GROUP_OP(IAdd, sum) + MSL_GROUP_OP(IMul, product) +#undef MSL_GROUP_OP + // The others, unfortunately, don't support InclusiveScan or ExclusiveScan. + +#define MSL_GROUP_OP(op, msl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "simd_" #msl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationExclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + /* Only cluster sizes of 4 are supported. */ \ + uint32_t cluster_size = evaluate_constant_u32(ops[5]); \ + if (cluster_size != 4) \ + SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ + emit_unary_func_op(result_type, id, ops[4], "quad_" #msl_op); \ + } \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + +#define MSL_GROUP_OP_CAST(op, msl_op, type) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "simd_" #msl_op, type, type); \ + else if (operation == GroupOperationInclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support InclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationExclusiveScan) \ + SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + /* Only cluster sizes of 4 are supported. */ \ + uint32_t cluster_size = evaluate_constant_u32(ops[5]); \ + if (cluster_size != 4) \ + SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ + emit_unary_func_op_cast(result_type, id, ops[4], "quad_" #msl_op, type, type); \ + } \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + + MSL_GROUP_OP(FMin, min) + MSL_GROUP_OP(FMax, max) + MSL_GROUP_OP_CAST(SMin, min, int_type) + MSL_GROUP_OP_CAST(SMax, max, int_type) + MSL_GROUP_OP_CAST(UMin, min, uint_type) + MSL_GROUP_OP_CAST(UMax, max, uint_type) + MSL_GROUP_OP(BitwiseAnd, and) + MSL_GROUP_OP(BitwiseOr, or) + MSL_GROUP_OP(BitwiseXor, xor) + MSL_GROUP_OP(LogicalAnd, and) + MSL_GROUP_OP(LogicalOr, or) + MSL_GROUP_OP(LogicalXor, xor) + // clang-format on +#undef MSL_GROUP_OP +#undef MSL_GROUP_OP_CAST + + case OpGroupNonUniformQuadSwap: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadSwap"); + break; + + case OpGroupNonUniformQuadBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "spvQuadBroadcast"); + break; + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + +string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + if (out_type.basetype == in_type.basetype) + return ""; + + assert(out_type.basetype != SPIRType::Boolean); + assert(in_type.basetype != SPIRType::Boolean); + + bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); + bool same_size_cast = out_type.width == in_type.width; + + if (integral_cast && same_size_cast) + { + // Trivial bitcast case, casts between integers. + return type_to_glsl(out_type); + } + else + { + // Fall back to the catch-all bitcast in MSL. + return "as_type<" + type_to_glsl(out_type) + ">"; + } +} + +bool CompilerMSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t) +{ + return false; +} + +// Returns an MSL string identifying the name of a SPIR-V builtin. +// Output builtins are qualified with the name of the stage out structure. +string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) +{ + switch (builtin) + { + + // Handle HLSL-style 0-based vertex/instance index. + // Override GLSL compiler strictness + case BuiltInVertexId: + ensure_builtin(StorageClassInput, BuiltInVertexId); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_vertex_arg != TriState::No) + needs_base_vertex_arg = TriState::Yes; + return "gl_VertexID"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseVertex); + return "(gl_VertexID - gl_BaseVertex)"; + } + } + else + { + return "gl_VertexID"; + } + case BuiltInInstanceId: + ensure_builtin(StorageClassInput, BuiltInInstanceId); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_instance_arg != TriState::No) + needs_base_instance_arg = TriState::Yes; + return "gl_InstanceID"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseInstance); + return "(gl_InstanceID - gl_BaseInstance)"; + } + } + else + { + return "gl_InstanceID"; + } + case BuiltInVertexIndex: + ensure_builtin(StorageClassInput, BuiltInVertexIndex); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_vertex_arg != TriState::No) + needs_base_vertex_arg = TriState::Yes; + return "gl_VertexIndex"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseVertex); + return "(gl_VertexIndex - gl_BaseVertex)"; + } + } + else + { + return "gl_VertexIndex"; + } + case BuiltInInstanceIndex: + ensure_builtin(StorageClassInput, BuiltInInstanceIndex); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_instance_arg != TriState::No) + needs_base_instance_arg = TriState::Yes; + return "gl_InstanceIndex"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseInstance); + return "(gl_InstanceIndex - gl_BaseInstance)"; + } + } + else + { + return "gl_InstanceIndex"; + } + case BuiltInBaseVertex: + if (msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + needs_base_vertex_arg = TriState::No; + return "gl_BaseVertex"; + } + else + { + SPIRV_CROSS_THROW("BaseVertex requires Metal 1.1 and Mac or Apple A9+ hardware."); + } + case BuiltInBaseInstance: + if (msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + needs_base_instance_arg = TriState::No; + return "gl_BaseInstance"; + } + else + { + SPIRV_CROSS_THROW("BaseInstance requires Metal 1.1 and Mac or Apple A9+ hardware."); + } + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + // When used in the entry function, output builtins are qualified with output struct name. + // Test storage class as NOT Input, as output builtins might be part of generic type. + // Also don't do this for tessellation control shaders. + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + /* fallthrough */ + case BuiltInFragDepth: + case BuiltInFragStencilRefEXT: + if ((builtin == BuiltInFragDepth && !msl_options.enable_frag_depth_builtin) || + (builtin == BuiltInFragStencilRefEXT && !msl_options.enable_frag_stencil_ref_builtin)) + break; + /* fallthrough */ + case BuiltInPosition: + case BuiltInPointSize: + case BuiltInClipDistance: + case BuiltInCullDistance: + case BuiltInLayer: + case BuiltInSampleMask: + if (get_execution_model() == ExecutionModelTessellationControl) + break; + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); + + break; + + case BuiltInBaryCoordNV: + case BuiltInBaryCoordNoPerspNV: + if (storage == StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return stage_in_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); + break; + + case BuiltInTessLevelOuter: + if (get_execution_model() == ExecutionModelTessellationEvaluation) + { + if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && + current_function && (current_function->self == ir.default_entry_point)) + return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); + else + break; + } + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "].edgeTessellationFactor"); + break; + + case BuiltInTessLevelInner: + if (get_execution_model() == ExecutionModelTessellationEvaluation) + { + if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && + current_function && (current_function->self == ir.default_entry_point)) + return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); + else + break; + } + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "].insideTessellationFactor"); + break; + + default: + break; + } + + return CompilerGLSL::builtin_to_glsl(builtin, storage); +} + +// Returns an MSL string attribute qualifer for a SPIR-V builtin +string CompilerMSL::builtin_qualifier(BuiltIn builtin) +{ + auto &execution = get_entry_point(); + + switch (builtin) + { + // Vertex function in + case BuiltInVertexId: + return "vertex_id"; + case BuiltInVertexIndex: + return "vertex_id"; + case BuiltInBaseVertex: + return "base_vertex"; + case BuiltInInstanceId: + return "instance_id"; + case BuiltInInstanceIndex: + return "instance_id"; + case BuiltInBaseInstance: + return "base_instance"; + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + // Vertex function out + case BuiltInClipDistance: + return "clip_distance"; + case BuiltInPointSize: + return "point_size"; + case BuiltInPosition: + if (position_invariant) + { + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Invariant position is only supported on MSL 2.1 and up."); + return "position, invariant"; + } + else + return "position"; + case BuiltInLayer: + return "render_target_array_index"; + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + return "viewport_array_index"; + + // Tess. control function in + case BuiltInInvocationId: + if (msl_options.multi_patch_workgroup) + { + // Shouldn't be reached. + SPIRV_CROSS_THROW("InvocationId is computed manually with multi-patch workgroups in MSL."); + } + return "thread_index_in_threadgroup"; + case BuiltInPatchVertices: + // Shouldn't be reached. + SPIRV_CROSS_THROW("PatchVertices is derived from the auxiliary buffer in MSL."); + case BuiltInPrimitiveId: + switch (execution.model) + { + case ExecutionModelTessellationControl: + if (msl_options.multi_patch_workgroup) + { + // Shouldn't be reached. + SPIRV_CROSS_THROW("PrimitiveId is computed manually with multi-patch workgroups in MSL."); + } + return "threadgroup_position_in_grid"; + case ExecutionModelTessellationEvaluation: + return "patch_id"; + case ExecutionModelFragment: + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("PrimitiveId on iOS requires MSL 2.3."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("PrimitiveId on macOS requires MSL 2.2."); + return "primitive_id"; + default: + SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model."); + } + + // Tess. control function out + case BuiltInTessLevelOuter: + case BuiltInTessLevelInner: + // Shouldn't be reached. + SPIRV_CROSS_THROW("Tessellation levels are handled specially in MSL."); + + // Tess. evaluation function in + case BuiltInTessCoord: + return "position_in_patch"; + + // Fragment function in + case BuiltInFrontFacing: + return "front_facing"; + case BuiltInPointCoord: + return "point_coord"; + case BuiltInFragCoord: + return "position"; + case BuiltInSampleId: + return "sample_id"; + case BuiltInSampleMask: + return "sample_mask"; + case BuiltInSamplePosition: + // Shouldn't be reached. + SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL."); + case BuiltInViewIndex: + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders."); + // The ViewIndex was implicitly used in the prior stages to set the render_target_array_index, + // so we can get it from there. + return "render_target_array_index"; + + // Fragment function out + case BuiltInFragDepth: + if (execution.flags.get(ExecutionModeDepthGreater)) + return "depth(greater)"; + else if (execution.flags.get(ExecutionModeDepthLess)) + return "depth(less)"; + else + return "depth(any)"; + + case BuiltInFragStencilRefEXT: + return "stencil"; + + // Compute function in + case BuiltInGlobalInvocationId: + return "thread_position_in_grid"; + + case BuiltInWorkgroupId: + return "threadgroup_position_in_grid"; + + case BuiltInNumWorkgroups: + return "threadgroups_per_grid"; + + case BuiltInLocalInvocationId: + return "thread_position_in_threadgroup"; + + case BuiltInLocalInvocationIndex: + return "thread_index_in_threadgroup"; + + case BuiltInSubgroupSize: + if (msl_options.emulate_subgroups || msl_options.fixed_subgroup_size != 0) + // Shouldn't be reached. + SPIRV_CROSS_THROW("Emitting threads_per_simdgroup attribute with fixed subgroup size??"); + if (execution.model == ExecutionModelFragment) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders."); + return "threads_per_simdgroup"; + } + else + { + // thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0, + // but not in fragment. + return "thread_execution_width"; + } + + case BuiltInNumSubgroups: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("NumSubgroups is handled specially with emulation."); + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); + return msl_options.is_ios() ? "quadgroups_per_threadgroup" : "simdgroups_per_threadgroup"; + + case BuiltInSubgroupId: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("SubgroupId is handled specially with emulation."); + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); + return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup"; + + case BuiltInSubgroupLocalInvocationId: + if (msl_options.emulate_subgroups) + // Shouldn't be reached. + SPIRV_CROSS_THROW("SubgroupLocalInvocationId is handled specially with emulation."); + if (execution.model == ExecutionModelFragment) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders."); + return "thread_index_in_simdgroup"; + } + else if (execution.model == ExecutionModelKernel || execution.model == ExecutionModelGLCompute || + execution.model == ExecutionModelTessellationControl || + (execution.model == ExecutionModelVertex && msl_options.vertex_for_tessellation)) + { + // We are generating a Metal kernel function. + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroup builtins in kernel functions require Metal 2.0."); + return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup"; + } + else + SPIRV_CROSS_THROW("Subgroup builtins are not available in this type of function."); + + case BuiltInSubgroupEqMask: + case BuiltInSubgroupGeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupLtMask: + // Shouldn't be reached. + SPIRV_CROSS_THROW("Subgroup ballot masks are handled specially in MSL."); + + case BuiltInBaryCoordNV: + // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); + else if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); + return "barycentric_coord, center_perspective"; + + case BuiltInBaryCoordNoPerspNV: + // TODO: AMD barycentrics as well? Seem to have different swizzle and 2 components rather than 3. + if (msl_options.is_ios() && !msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.3 and above on iOS."); + else if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Barycentrics are only supported in MSL 2.2 and above on macOS."); + return "barycentric_coord, center_no_perspective"; + + default: + return "unsupported-built-in"; + } +} + +// Returns an MSL string type declaration for a SPIR-V builtin +string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) +{ + const SPIREntryPoint &execution = get_entry_point(); + switch (builtin) + { + // Vertex function in + case BuiltInVertexId: + return "uint"; + case BuiltInVertexIndex: + return "uint"; + case BuiltInBaseVertex: + return "uint"; + case BuiltInInstanceId: + return "uint"; + case BuiltInInstanceIndex: + return "uint"; + case BuiltInBaseInstance: + return "uint"; + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + // Vertex function out + case BuiltInClipDistance: + return "float"; + case BuiltInPointSize: + return "float"; + case BuiltInPosition: + return "float4"; + case BuiltInLayer: + return "uint"; + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + return "uint"; + + // Tess. control function in + case BuiltInInvocationId: + return "uint"; + case BuiltInPatchVertices: + return "uint"; + case BuiltInPrimitiveId: + return "uint"; + + // Tess. control function out + case BuiltInTessLevelInner: + if (execution.model == ExecutionModelTessellationEvaluation) + return !execution.flags.get(ExecutionModeTriangles) ? "float2" : "float"; + return "half"; + case BuiltInTessLevelOuter: + if (execution.model == ExecutionModelTessellationEvaluation) + return !execution.flags.get(ExecutionModeTriangles) ? "float4" : "float"; + return "half"; + + // Tess. evaluation function in + case BuiltInTessCoord: + return execution.flags.get(ExecutionModeTriangles) ? "float3" : "float2"; + + // Fragment function in + case BuiltInFrontFacing: + return "bool"; + case BuiltInPointCoord: + return "float2"; + case BuiltInFragCoord: + return "float4"; + case BuiltInSampleId: + return "uint"; + case BuiltInSampleMask: + return "uint"; + case BuiltInSamplePosition: + return "float2"; + case BuiltInViewIndex: + return "uint"; + + case BuiltInHelperInvocation: + return "bool"; + + case BuiltInBaryCoordNV: + case BuiltInBaryCoordNoPerspNV: + // Use the type as declared, can be 1, 2 or 3 components. + return type_to_glsl(get_variable_data_type(get(id))); + + // Fragment function out + case BuiltInFragDepth: + return "float"; + + case BuiltInFragStencilRefEXT: + return "uint"; + + // Compute function in + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInNumWorkgroups: + case BuiltInWorkgroupId: + return "uint3"; + case BuiltInLocalInvocationIndex: + case BuiltInNumSubgroups: + case BuiltInSubgroupId: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + return "uint"; + case BuiltInSubgroupEqMask: + case BuiltInSubgroupGeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupLtMask: + return "uint4"; + + case BuiltInDeviceIndex: + return "int"; + + default: + return "unsupported-built-in-type"; + } +} + +// Returns the declaration of a built-in argument to a function +string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma) +{ + string bi_arg; + if (prefix_comma) + bi_arg += ", "; + + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; + bi_arg += builtin_type_decl(builtin); + bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput); + bi_arg += " [[" + builtin_qualifier(builtin) + "]]"; + builtin_declaration = false; + + return bi_arg; +} + +const SPIRType &CompilerMSL::get_physical_member_type(const SPIRType &type, uint32_t index) const +{ + if (member_is_remapped_physical_type(type, index)) + return get(get_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID)); + else + return get(type.member_types[index]); +} + +SPIRType CompilerMSL::get_presumed_input_type(const SPIRType &ib_type, uint32_t index) const +{ + SPIRType type = get_physical_member_type(ib_type, index); + uint32_t loc = get_member_decoration(ib_type.self, index, DecorationLocation); + if (inputs_by_location.count(loc)) + { + if (inputs_by_location.at(loc).vecsize > type.vecsize) + type.vecsize = inputs_by_location.at(loc).vecsize; + } + return type; +} + +uint32_t CompilerMSL::get_declared_type_array_stride_msl(const SPIRType &type, bool is_packed, bool row_major) const +{ + // Array stride in MSL is always size * array_size. sizeof(float3) == 16, + // unlike GLSL and HLSL where array stride would be 16 and size 12. + + // We could use parent type here and recurse, but that makes creating physical type remappings + // far more complicated. We'd rather just create the final type, and ignore having to create the entire type + // hierarchy in order to compute this value, so make a temporary type on the stack. + + auto basic_type = type; + basic_type.array.clear(); + basic_type.array_size_literal.clear(); + uint32_t value_size = get_declared_type_size_msl(basic_type, is_packed, row_major); + + uint32_t dimensions = uint32_t(type.array.size()); + assert(dimensions > 0); + dimensions--; + + // Multiply together every dimension, except the last one. + for (uint32_t dim = 0; dim < dimensions; dim++) + { + uint32_t array_size = to_array_size_literal(type, dim); + value_size *= max(array_size, 1u); + } + + return value_size; +} + +uint32_t CompilerMSL::get_declared_struct_member_array_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_array_stride_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_array_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_array_stride_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const +{ + // For packed matrices, we just use the size of the vector type. + // Otherwise, MatrixStride == alignment, which is the size of the underlying vector type. + if (packed) + return (type.width / 8) * ((row_major && type.columns > 1) ? type.columns : type.vecsize); + else + return get_declared_type_alignment_msl(type, false, row_major); +} + +uint32_t CompilerMSL::get_declared_struct_member_matrix_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_matrix_stride_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_matrix_stride_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_matrix_stride_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment, + bool ignore_padding) const +{ + // If we have a target size, that is the declared size as well. + if (!ignore_padding && has_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget)) + return get_extended_decoration(struct_type.self, SPIRVCrossDecorationPaddingTarget); + + if (struct_type.member_types.empty()) + return 0; + + uint32_t mbr_cnt = uint32_t(struct_type.member_types.size()); + + // In MSL, a struct's alignment is equal to the maximum alignment of any of its members. + uint32_t alignment = 1; + + if (!ignore_alignment) + { + for (uint32_t i = 0; i < mbr_cnt; i++) + { + uint32_t mbr_alignment = get_declared_struct_member_alignment_msl(struct_type, i); + alignment = max(alignment, mbr_alignment); + } + } + + // Last member will always be matched to the final Offset decoration, but size of struct in MSL now depends + // on physical size in MSL, and the size of the struct itself is then aligned to struct alignment. + uint32_t spirv_offset = type_struct_member_offset(struct_type, mbr_cnt - 1); + uint32_t msl_size = spirv_offset + get_declared_struct_member_size_msl(struct_type, mbr_cnt - 1); + msl_size = (msl_size + alignment - 1) & ~(alignment - 1); + return msl_size; +} + +// Returns the byte size of a struct member. +uint32_t CompilerMSL::get_declared_type_size_msl(const SPIRType &type, bool is_packed, bool row_major) const +{ + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying size of opaque object."); + + default: + { + if (!type.array.empty()) + { + uint32_t array_size = to_array_size_literal(type); + return get_declared_type_array_stride_msl(type, is_packed, row_major) * max(array_size, 1u); + } + + if (type.basetype == SPIRType::Struct) + return get_declared_struct_size_msl(type); + + if (is_packed) + { + return type.vecsize * type.columns * (type.width / 8); + } + else + { + // An unpacked 3-element vector or matrix column is the same memory size as a 4-element. + uint32_t vecsize = type.vecsize; + uint32_t columns = type.columns; + + if (row_major && columns > 1) + swap(vecsize, columns); + + if (vecsize == 3) + vecsize = 4; + + return vecsize * columns * (type.width / 8); + } + } + } +} + +uint32_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_size_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_size_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_size_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +// Returns the byte alignment of a type. +uint32_t CompilerMSL::get_declared_type_alignment_msl(const SPIRType &type, bool is_packed, bool row_major) const +{ + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying alignment of opaque object."); + + case SPIRType::Int64: + SPIRV_CROSS_THROW("long types are not supported in buffers in MSL."); + case SPIRType::UInt64: + SPIRV_CROSS_THROW("ulong types are not supported in buffers in MSL."); + case SPIRType::Double: + SPIRV_CROSS_THROW("double types are not supported in buffers in MSL."); + + case SPIRType::Struct: + { + // In MSL, a struct's alignment is equal to the maximum alignment of any of its members. + uint32_t alignment = 1; + for (uint32_t i = 0; i < type.member_types.size(); i++) + alignment = max(alignment, uint32_t(get_declared_struct_member_alignment_msl(type, i))); + return alignment; + } + + default: + { + // Alignment of packed type is the same as the underlying component or column size. + // Alignment of unpacked type is the same as the vector size. + // Alignment of 3-elements vector is the same as 4-elements (including packed using column). + if (is_packed) + { + // If we have packed_T and friends, the alignment is always scalar. + return type.width / 8; + } + else + { + // This is the general rule for MSL. Size == alignment. + uint32_t vecsize = (row_major && type.columns > 1) ? type.columns : type.vecsize; + return (type.width / 8) * (vecsize == 3 ? 4 : vecsize); + } + } + } +} + +uint32_t CompilerMSL::get_declared_struct_member_alignment_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_alignment_msl(get_physical_member_type(type, index), + member_is_packed_physical_type(type, index), + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +uint32_t CompilerMSL::get_declared_input_alignment_msl(const SPIRType &type, uint32_t index) const +{ + return get_declared_type_alignment_msl(get_presumed_input_type(type, index), false, + has_member_decoration(type.self, index, DecorationRowMajor)); +} + +bool CompilerMSL::skip_argument(uint32_t) const +{ + return false; +} + +void CompilerMSL::analyze_sampled_image_usage() +{ + if (msl_options.swizzle_texture_samples) + { + SampledImageScanner scanner(*this); + traverse_all_reachable_opcodes(get(ir.default_entry_point), scanner); + } +} + +bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *args, uint32_t length) +{ + switch (opcode) + { + case OpLoad: + case OpImage: + case OpSampledImage: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + auto &type = compiler.get(result_type); + if ((type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage) || type.image.sampled != 1) + return true; + + uint32_t id = args[1]; + compiler.set(id, "", result_type, true); + break; + } + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + compiler.has_sampled_images = + compiler.has_sampled_images || compiler.is_sampled_image_type(compiler.expression_type(args[2])); + compiler.needs_swizzle_buffer_def = compiler.needs_swizzle_buffer_def || compiler.has_sampled_images; + break; + default: + break; + } + return true; +} + +// If a needed custom function wasn't added before, add it and force a recompile. +void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func) +{ + if (spv_function_implementations.count(spv_func) == 0) + { + spv_function_implementations.insert(spv_func); + suppress_missing_prototypes = true; + force_recompile(); + } +} + +bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // Since MSL exists in a single execution scope, function prototype declarations are not + // needed, and clutter the output. If secondary functions are output (either as a SPIR-V + // function implementation or as indicated by the presence of OpFunctionCall), then set + // suppress_missing_prototypes to suppress compiler warnings of missing function prototypes. + + // Mark if the input requires the implementation of an SPIR-V function that does not exist in Metal. + SPVFuncImpl spv_func = get_spv_func_impl(opcode, args); + if (spv_func != SPVFuncImplNone) + { + compiler.spv_function_implementations.insert(spv_func); + suppress_missing_prototypes = true; + } + + switch (opcode) + { + + case OpFunctionCall: + suppress_missing_prototypes = true; + break; + + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + auto *var = compiler.maybe_get_backing_variable(args[2]); + image_pointers[args[1]] = var ? var->self : ID(0); + break; + } + + case OpImageWrite: + if (!compiler.msl_options.supports_msl_version(2, 2)) + uses_resource_write = true; + break; + + case OpStore: + check_resource_write(args[0]); + break; + + // Emulate texture2D atomic operations + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + { + uses_atomics = true; + auto it = image_pointers.find(args[2]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + } + check_resource_write(args[2]); + break; + } + + case OpAtomicStore: + { + uses_atomics = true; + auto it = image_pointers.find(args[0]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + } + check_resource_write(args[0]); + break; + } + + case OpAtomicLoad: + { + uses_atomics = true; + auto it = image_pointers.find(args[2]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + } + break; + } + + case OpGroupNonUniformInverseBallot: + needs_subgroup_invocation_id = true; + break; + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + needs_subgroup_size = true; + break; + + case OpGroupNonUniformBallotBitCount: + if (args[3] == GroupOperationReduce) + needs_subgroup_size = true; + else + needs_subgroup_invocation_id = true; + break; + + case OpArrayLength: + { + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + compiler.buffers_requiring_array_length.insert(var->self); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + // OpArrayLength might want to know if taking ArrayLength of an array of SSBOs. + uint32_t result_type = args[0]; + uint32_t id = args[1]; + uint32_t ptr = args[2]; + + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + compiler.ir.ids[id].set_allow_type_rewrite(); + break; + } + + case OpExtInst: + { + uint32_t extension_set = args[2]; + if (compiler.get(extension_set).ext == SPIRExtension::GLSL) + { + auto op_450 = static_cast(args[3]); + switch (op_450) + { + case GLSLstd450InterpolateAtCentroid: + case GLSLstd450InterpolateAtSample: + case GLSLstd450InterpolateAtOffset: + { + if (!compiler.msl_options.supports_msl_version(2, 3)) + SPIRV_CROSS_THROW("Pull-model interpolation requires MSL 2.3."); + // Fragment varyings used with pull-model interpolation need special handling, + // due to the way pull-model interpolation works in Metal. + auto *var = compiler.maybe_get_backing_variable(args[4]); + if (var) + { + compiler.pull_model_inputs.insert(var->self); + auto &var_type = compiler.get_variable_element_type(*var); + // In addition, if this variable has a 'Sample' decoration, we need the sample ID + // in order to do default interpolation. + if (compiler.has_decoration(var->self, DecorationSample)) + { + needs_sample_id = true; + } + else if (var_type.basetype == SPIRType::Struct) + { + // Now we need to check each member and see if it has this decoration. + for (uint32_t i = 0; i < var_type.member_types.size(); ++i) + { + if (compiler.has_member_decoration(var_type.self, i, DecorationSample)) + { + needs_sample_id = true; + break; + } + } + } + } + break; + } + default: + break; + } + } + break; + } + + default: + break; + } + + // If it has one, keep track of the instruction's result type, mapped by ID + uint32_t result_type, result_id; + if (compiler.instruction_to_result_type(result_type, result_id, opcode, args, length)) + result_types[result_id] = result_type; + + return true; +} + +// If the variable is a Uniform or StorageBuffer, mark that a resource has been written to. +void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id) +{ + auto *p_var = compiler.maybe_get_backing_variable(var_id); + StorageClass sc = p_var ? p_var->storage : StorageClassMax; + if (!compiler.msl_options.supports_msl_version(2, 1) && + (sc == StorageClassUniform || sc == StorageClassStorageBuffer)) + uses_resource_write = true; +} + +// Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes. +CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args) +{ + switch (opcode) + { + case OpFMod: + return SPVFuncImplMod; + + case OpFAdd: + if (compiler.msl_options.invariant_float_math) + { + return SPVFuncImplFAdd; + } + break; + + case OpFMul: + case OpOuterProduct: + case OpMatrixTimesVector: + case OpVectorTimesMatrix: + case OpMatrixTimesMatrix: + if (compiler.msl_options.invariant_float_math) + { + return SPVFuncImplFMul; + } + break; + + case OpTypeArray: + { + // Allow Metal to use the array template to make arrays a value type + return SPVFuncImplUnsafeArray; + } + + // Emulate texture2D atomic operations + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicLoad: + case OpAtomicStore: + { + auto it = image_pointers.find(args[opcode == OpAtomicStore ? 0 : 2]); + if (it != image_pointers.end()) + { + uint32_t tid = compiler.get(it->second).basetype; + if (tid && compiler.get(tid).image.dim == Dim2D) + return SPVFuncImplImage2DAtomicCoords; + } + break; + } + + case OpImageFetch: + case OpImageRead: + case OpImageWrite: + { + // Retrieve the image type, and if it's a Buffer, emit a texel coordinate function + uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]]; + if (tid && compiler.get(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native) + return SPVFuncImplTexelBufferCoords; + break; + } + + case OpExtInst: + { + uint32_t extension_set = args[2]; + if (compiler.get(extension_set).ext == SPIRExtension::GLSL) + { + auto op_450 = static_cast(args[3]); + switch (op_450) + { + case GLSLstd450Radians: + return SPVFuncImplRadians; + case GLSLstd450Degrees: + return SPVFuncImplDegrees; + case GLSLstd450FindILsb: + return SPVFuncImplFindILsb; + case GLSLstd450FindSMsb: + return SPVFuncImplFindSMsb; + case GLSLstd450FindUMsb: + return SPVFuncImplFindUMsb; + case GLSLstd450SSign: + return SPVFuncImplSSign; + case GLSLstd450Reflect: + { + auto &type = compiler.get(args[0]); + if (type.vecsize == 1) + return SPVFuncImplReflectScalar; + break; + } + case GLSLstd450Refract: + { + auto &type = compiler.get(args[0]); + if (type.vecsize == 1) + return SPVFuncImplRefractScalar; + break; + } + case GLSLstd450FaceForward: + { + auto &type = compiler.get(args[0]); + if (type.vecsize == 1) + return SPVFuncImplFaceForwardScalar; + break; + } + case GLSLstd450MatrixInverse: + { + auto &mat_type = compiler.get(args[0]); + switch (mat_type.columns) + { + case 2: + return SPVFuncImplInverse2x2; + case 3: + return SPVFuncImplInverse3x3; + case 4: + return SPVFuncImplInverse4x4; + default: + break; + } + break; + } + default: + break; + } + } + break; + } + + case OpGroupNonUniformBroadcast: + return SPVFuncImplSubgroupBroadcast; + + case OpGroupNonUniformBroadcastFirst: + return SPVFuncImplSubgroupBroadcastFirst; + + case OpGroupNonUniformBallot: + return SPVFuncImplSubgroupBallot; + + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + return SPVFuncImplSubgroupBallotBitExtract; + + case OpGroupNonUniformBallotFindLSB: + return SPVFuncImplSubgroupBallotFindLSB; + + case OpGroupNonUniformBallotFindMSB: + return SPVFuncImplSubgroupBallotFindMSB; + + case OpGroupNonUniformBallotBitCount: + return SPVFuncImplSubgroupBallotBitCount; + + case OpGroupNonUniformAllEqual: + return SPVFuncImplSubgroupAllEqual; + + case OpGroupNonUniformShuffle: + return SPVFuncImplSubgroupShuffle; + + case OpGroupNonUniformShuffleXor: + return SPVFuncImplSubgroupShuffleXor; + + case OpGroupNonUniformShuffleUp: + return SPVFuncImplSubgroupShuffleUp; + + case OpGroupNonUniformShuffleDown: + return SPVFuncImplSubgroupShuffleDown; + + case OpGroupNonUniformQuadBroadcast: + return SPVFuncImplQuadBroadcast; + + case OpGroupNonUniformQuadSwap: + return SPVFuncImplQuadSwap; + + default: + break; + } + return SPVFuncImplNone; +} + +// Sort both type and meta member content based on builtin status (put builtins at end), +// then by the required sorting aspect. +void CompilerMSL::MemberSorter::sort() +{ + // Create a temporary array of consecutive member indices and sort it based on how + // the members should be reordered, based on builtin and sorting aspect meta info. + size_t mbr_cnt = type.member_types.size(); + SmallVector mbr_idxs(mbr_cnt); + std::iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices + std::stable_sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect + + bool sort_is_identity = true; + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + if (mbr_idx != mbr_idxs[mbr_idx]) + { + sort_is_identity = false; + break; + } + } + + if (sort_is_identity) + return; + + if (meta.members.size() < type.member_types.size()) + { + // This should never trigger in normal circumstances, but to be safe. + meta.members.resize(type.member_types.size()); + } + + // Move type and meta member info to the order defined by the sorted member indices. + // This is done by creating temporary copies of both member types and meta, and then + // copying back to the original content at the sorted indices. + auto mbr_types_cpy = type.member_types; + auto mbr_meta_cpy = meta.members; + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + type.member_types[mbr_idx] = mbr_types_cpy[mbr_idxs[mbr_idx]]; + meta.members[mbr_idx] = mbr_meta_cpy[mbr_idxs[mbr_idx]]; + } + + if (sort_aspect == SortAspect::Offset) + { + // If we're sorting by Offset, this might affect user code which accesses a buffer block. + // We will need to redirect member indices from one index to sorted index. + type.member_type_index_redirection = std::move(mbr_idxs); + } +} + +// Sort first by builtin status (put builtins at end), then by the sorting aspect. +bool CompilerMSL::MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2) +{ + auto &mbr_meta1 = meta.members[mbr_idx1]; + auto &mbr_meta2 = meta.members[mbr_idx2]; + if (mbr_meta1.builtin != mbr_meta2.builtin) + return mbr_meta2.builtin; + else + switch (sort_aspect) + { + case Location: + return mbr_meta1.location < mbr_meta2.location; + case LocationReverse: + return mbr_meta1.location > mbr_meta2.location; + case Offset: + return mbr_meta1.offset < mbr_meta2.offset; + case OffsetThenLocationReverse: + return (mbr_meta1.offset < mbr_meta2.offset) || + ((mbr_meta1.offset == mbr_meta2.offset) && (mbr_meta1.location > mbr_meta2.location)); + case Alphabetical: + return mbr_meta1.alias < mbr_meta2.alias; + default: + return false; + } +} + +CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) + : type(t) + , meta(m) + , sort_aspect(sa) +{ + // Ensure enough meta info is available + meta.members.resize(max(type.member_types.size(), meta.members.size())); +} + +void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler) +{ + auto &type = get(get(id).basetype); + if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) + SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type."); + if (!type.array.empty()) + SPIRV_CROSS_THROW("Can not remap array of samplers."); + constexpr_samplers_by_id[id] = sampler; +} + +void CompilerMSL::remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, + const MSLConstexprSampler &sampler) +{ + constexpr_samplers_by_binding[{ desc_set, binding }] = sampler; +} + +void CompilerMSL::cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) +{ + auto *var = maybe_get_backing_variable(source_id); + if (var) + source_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(source_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast(get_decoration(source_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + auto expected_width = expr_type.width; + switch (builtin) + { + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + case BuiltInLocalInvocationIndex: + case BuiltInWorkgroupSize: + case BuiltInNumWorkgroups: + case BuiltInLayer: + case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: + case BuiltInPrimitiveId: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + case BuiltInViewIndex: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + case BuiltInBaseInstance: + case BuiltInBaseVertex: + expected_type = SPIRType::UInt; + expected_width = 32; + break; + + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + if (get_execution_model() == ExecutionModelTessellationControl) + { + expected_type = SPIRType::Half; + expected_width = 16; + } + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + { + if (expected_width != expr_type.width) + { + // These are of different widths, so we cannot do a straight bitcast. + expr = join(type_to_glsl(expr_type), "(", expr, ")"); + } + else + { + expr = bitcast_expression(expr_type, expected_type, expr); + } + } + + if (builtin == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads) && expr_type.vecsize == 3) + { + // In SPIR-V, this is always a vec3, even for quads. In Metal, though, it's a float2 for quads. + // The code is expecting a float3, so we need to widen this. + expr = join("float3(", expr, ", 0)"); + } +} + +void CompilerMSL::cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) +{ + auto *var = maybe_get_backing_variable(target_id); + if (var) + target_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(target_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast(get_decoration(target_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + auto expected_width = expr_type.width; + switch (builtin) + { + case BuiltInLayer: + case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: + case BuiltInPrimitiveId: + case BuiltInViewIndex: + expected_type = SPIRType::UInt; + expected_width = 32; + break; + + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + expected_type = SPIRType::Half; + expected_width = 16; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + { + if (expected_width != expr_type.width) + { + // These are of different widths, so we cannot do a straight bitcast. + auto type = expr_type; + type.basetype = expected_type; + type.width = expected_width; + expr = join(type_to_glsl(type), "(", expr, ")"); + } + else + { + auto type = expr_type; + type.basetype = expected_type; + expr = bitcast_expression(type, expr_type.basetype, expr); + } + } +} + +string CompilerMSL::to_initializer_expression(const SPIRVariable &var) +{ + // We risk getting an array initializer here with MSL. If we have an array. + // FIXME: We cannot handle non-constant arrays being initialized. + // We will need to inject spvArrayCopy here somehow ... + auto &type = get(var.basetype); + string expr; + if (ir.ids[var.initializer].get_type() == TypeConstant && + (!type.array.empty() || type.basetype == SPIRType::Struct)) + expr = constant_expression(get(var.initializer)); + else + expr = CompilerGLSL::to_initializer_expression(var); + // If the initializer has more vector components than the variable, add a swizzle. + // FIXME: This can't handle arrays or structs. + auto &init_type = expression_type(var.initializer); + if (type.array.empty() && type.basetype != SPIRType::Struct && init_type.vecsize > type.vecsize) + expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0)); + return expr; +} + +string CompilerMSL::to_zero_initialized_expression(uint32_t) +{ + return "{}"; +} + +bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const +{ + if (!msl_options.argument_buffers) + return false; + if (desc_set >= kMaxArgumentBuffers) + return false; + + return (argument_buffer_discrete_mask & (1u << desc_set)) == 0; +} + +void CompilerMSL::analyze_argument_buffers() +{ + // Gather all used resources and sort them out into argument buffers. + // Each argument buffer corresponds to a descriptor set in SPIR-V. + // The [[id(N)]] values used correspond to the resource mapping we have for MSL. + // Otherwise, the binding number is used, but this is generally not safe some types like + // combined image samplers and arrays of resources. Metal needs different indices here, + // while SPIR-V can have one descriptor set binding. To use argument buffers in practice, + // you will need to use the remapping from the API. + for (auto &id : argument_buffer_ids) + id = 0; + + // Output resources, sorted by resource index & type. + struct Resource + { + SPIRVariable *var; + string name; + SPIRType::BaseType basetype; + uint32_t index; + uint32_t plane; + }; + SmallVector resources_in_set[kMaxArgumentBuffers]; + SmallVector inline_block_vars; + + bool set_needs_swizzle_buffer[kMaxArgumentBuffers] = {}; + bool set_needs_buffer_sizes[kMaxArgumentBuffers] = {}; + bool needs_buffer_sizes = false; + + ir.for_each_typed_id([&](uint32_t self, SPIRVariable &var) { + if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassStorageBuffer) && + !is_hidden_variable(var)) + { + uint32_t desc_set = get_decoration(self, DecorationDescriptorSet); + // Ignore if it's part of a push descriptor set. + if (!descriptor_set_is_argument_buffer(desc_set)) + return; + + uint32_t var_id = var.self; + auto &type = get_variable_data_type(var); + + if (desc_set >= kMaxArgumentBuffers) + SPIRV_CROSS_THROW("Descriptor set index is out of range."); + + const MSLConstexprSampler *constexpr_sampler = nullptr; + if (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler) + { + constexpr_sampler = find_constexpr_sampler(var_id); + if (constexpr_sampler) + { + // Mark this ID as a constexpr sampler for later in case it came from set/bindings. + constexpr_samplers_by_id[var_id] = *constexpr_sampler; + } + } + + uint32_t binding = get_decoration(var_id, DecorationBinding); + if (type.basetype == SPIRType::SampledImage) + { + add_resource_name(var_id); + + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; + + for (uint32_t i = 0; i < plane_count; i++) + { + uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::Image, image_resource_index, i }); + } + + if (type.image.dim != DimBuffer && !constexpr_sampler) + { + uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); + resources_in_set[desc_set].push_back( + { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 }); + } + } + else if (inline_uniform_blocks.count(SetBindingPair{ desc_set, binding })) + { + inline_block_vars.push_back(var_id); + } + else if (!constexpr_sampler) + { + // constexpr samplers are not declared as resources. + // Inline uniform blocks are always emitted at the end. + if (!msl_options.is_ios() || type.basetype != SPIRType::Image || type.image.sampled != 2) + { + add_resource_name(var_id); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 }); + + // Emulate texture2D atomic operations + if (atomic_image_vars.count(var.self)) + { + uint32_t buffer_resource_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id) + "_atomic", SPIRType::Struct, buffer_resource_index, 0 }); + } + } + } + + // Check if this descriptor set needs a swizzle buffer. + if (needs_swizzle_buffer_def && is_sampled_image_type(type)) + set_needs_swizzle_buffer[desc_set] = true; + else if (buffers_requiring_array_length.count(var_id) != 0) + { + set_needs_buffer_sizes[desc_set] = true; + needs_buffer_sizes = true; + } + } + }); + + if (needs_swizzle_buffer_def || needs_buffer_sizes) + { + uint32_t uint_ptr_type_id = 0; + + // We might have to add a swizzle buffer resource to the set. + for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++) + { + if (!set_needs_swizzle_buffer[desc_set] && !set_needs_buffer_sizes[desc_set]) + continue; + + if (uint_ptr_type_id == 0) + { + uint_ptr_type_id = ir.increase_bound_by(1); + + // Create a buffer to hold extra data, including the swizzle constants. + SPIRType uint_type_pointer = get_uint_type(); + uint_type_pointer.pointer = true; + uint_type_pointer.pointer_depth = 1; + uint_type_pointer.parent_type = get_uint_type_id(); + uint_type_pointer.storage = StorageClassUniform; + set(uint_ptr_type_id, uint_type_pointer); + set_decoration(uint_ptr_type_id, DecorationArrayStride, 4); + } + + if (set_needs_swizzle_buffer[desc_set]) + { + uint32_t var_id = ir.increase_bound_by(1); + auto &var = set(var_id, uint_ptr_type_id, StorageClassUniformConstant); + set_name(var_id, "spvSwizzleConstants"); + set_decoration(var_id, DecorationDescriptorSet, desc_set); + set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); + } + + if (set_needs_buffer_sizes[desc_set]) + { + uint32_t var_id = ir.increase_bound_by(1); + auto &var = set(var_id, uint_ptr_type_id, StorageClassUniformConstant); + set_name(var_id, "spvBufferSizeConstants"); + set_decoration(var_id, DecorationDescriptorSet, desc_set); + set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); + } + } + } + + // Now add inline uniform blocks. + for (uint32_t var_id : inline_block_vars) + { + auto &var = get(var_id); + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + add_resource_name(var_id); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::Struct, get_metal_resource_index(var, SPIRType::Struct), 0 }); + } + + for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++) + { + auto &resources = resources_in_set[desc_set]; + if (resources.empty()) + continue; + + assert(descriptor_set_is_argument_buffer(desc_set)); + + uint32_t next_id = ir.increase_bound_by(3); + uint32_t type_id = next_id + 1; + uint32_t ptr_type_id = next_id + 2; + argument_buffer_ids[desc_set] = next_id; + + auto &buffer_type = set(type_id); + + buffer_type.basetype = SPIRType::Struct; + + if ((argument_buffer_device_storage_mask & (1u << desc_set)) != 0) + { + buffer_type.storage = StorageClassStorageBuffer; + // Make sure the argument buffer gets marked as const device. + set_decoration(next_id, DecorationNonWritable); + // Need to mark the type as a Block to enable this. + set_decoration(type_id, DecorationBlock); + } + else + buffer_type.storage = StorageClassUniform; + + set_name(type_id, join("spvDescriptorSetBuffer", desc_set)); + + auto &ptr_type = set(ptr_type_id); + ptr_type = buffer_type; + ptr_type.pointer = true; + ptr_type.pointer_depth = 1; + ptr_type.parent_type = type_id; + + uint32_t buffer_variable_id = next_id; + set(buffer_variable_id, ptr_type_id, StorageClassUniform); + set_name(buffer_variable_id, join("spvDescriptorSet", desc_set)); + + // Ids must be emitted in ID order. + sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool { + return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype); + }); + + uint32_t member_index = 0; + for (auto &resource : resources) + { + auto &var = *resource.var; + auto &type = get_variable_data_type(var); + string mbr_name = ensure_valid_name(resource.name, "m"); + if (resource.plane > 0) + mbr_name += join(plane_name_suffix, resource.plane); + set_member_name(buffer_type.self, member_index, mbr_name); + + if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler) + { + // Have to synthesize a sampler type here. + + bool type_is_array = !type.array.empty(); + uint32_t sampler_type_id = ir.increase_bound_by(type_is_array ? 2 : 1); + auto &new_sampler_type = set(sampler_type_id); + new_sampler_type.basetype = SPIRType::Sampler; + new_sampler_type.storage = StorageClassUniformConstant; + + if (type_is_array) + { + uint32_t sampler_type_array_id = sampler_type_id + 1; + auto &sampler_type_array = set(sampler_type_array_id); + sampler_type_array = new_sampler_type; + sampler_type_array.array = type.array; + sampler_type_array.array_size_literal = type.array_size_literal; + sampler_type_array.parent_type = sampler_type_id; + buffer_type.member_types.push_back(sampler_type_array_id); + } + else + buffer_type.member_types.push_back(sampler_type_id); + } + else + { + uint32_t binding = get_decoration(var.self, DecorationBinding); + SetBindingPair pair = { desc_set, binding }; + + if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler || + resource.basetype == SPIRType::SampledImage) + { + // Drop pointer information when we emit the resources into a struct. + buffer_type.member_types.push_back(get_variable_data_type_id(var)); + if (resource.plane == 0) + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + else if (buffers_requiring_dynamic_offset.count(pair)) + { + // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later. + buffer_type.member_types.push_back(var.basetype); + buffers_requiring_dynamic_offset[pair].second = var.self; + } + else if (inline_uniform_blocks.count(pair)) + { + // Put the buffer block itself into the argument buffer. + buffer_type.member_types.push_back(get_variable_data_type_id(var)); + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + else if (atomic_image_vars.count(var.self)) + { + // Emulate texture2D atomic operations. + // Don't set the qualified name: it's already set for this variable, + // and the code that references the buffer manually appends "_atomic" + // to the name. + uint32_t offset = ir.increase_bound_by(2); + uint32_t atomic_type_id = offset; + uint32_t type_ptr_id = offset + 1; + + SPIRType atomic_type; + atomic_type.basetype = SPIRType::AtomicCounter; + atomic_type.width = 32; + atomic_type.vecsize = 1; + set(atomic_type_id, atomic_type); + + atomic_type.pointer = true; + atomic_type.parent_type = atomic_type_id; + atomic_type.storage = StorageClassStorageBuffer; + auto &atomic_ptr_type = set(type_ptr_id, atomic_type); + atomic_ptr_type.self = atomic_type_id; + + buffer_type.member_types.push_back(type_ptr_id); + } + else + { + // Resources will be declared as pointers not references, so automatically dereference as appropriate. + buffer_type.member_types.push_back(var.basetype); + if (type.array.empty()) + set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")")); + else + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + } + + set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationResourceIndexPrimary, + resource.index); + set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, + var.self); + member_index++; + } + } +} + +void CompilerMSL::activate_argument_buffer_resources() +{ + // For ABI compatibility, force-enable all resources which are part of argument buffers. + ir.for_each_typed_id([&](uint32_t self, const SPIRVariable &) { + if (!has_decoration(self, DecorationDescriptorSet)) + return; + + uint32_t desc_set = get_decoration(self, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + active_interface_variables.insert(self); + }); +} + +bool CompilerMSL::using_builtin_array() const +{ + return msl_options.force_native_arrays || is_using_builtin_array; +} + +void CompilerMSL::set_combined_sampler_suffix(const char *suffix) +{ + sampler_name_suffix = suffix; +} + +const char *CompilerMSL::get_combined_sampler_suffix() const +{ + return sampler_name_suffix.c_str(); +} diff --git a/dep/spirv-cross/spirv_msl.hpp b/dep/spirv-cross/spirv_msl.hpp new file mode 100644 index 000000000..003fb83ae --- /dev/null +++ b/dep/spirv-cross/spirv_msl.hpp @@ -0,0 +1,1085 @@ +/* + * Copyright 2016-2020 The Brenwill Workshop Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_MSL_HPP +#define SPIRV_CROSS_MSL_HPP + +#include "spirv_glsl.hpp" +#include +#include +#include +#include +#include + +namespace SPIRV_CROSS_NAMESPACE +{ + +// Indicates the format of a shader input. Currently limited to specifying +// if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or +// some other format. +enum MSLShaderInputFormat +{ + MSL_SHADER_INPUT_FORMAT_OTHER = 0, + MSL_SHADER_INPUT_FORMAT_UINT8 = 1, + MSL_SHADER_INPUT_FORMAT_UINT16 = 2, + MSL_SHADER_INPUT_FORMAT_ANY16 = 3, + MSL_SHADER_INPUT_FORMAT_ANY32 = 4, + + // Deprecated aliases. + MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_INPUT_FORMAT_OTHER, + MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_INPUT_FORMAT_UINT8, + MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_INPUT_FORMAT_UINT16, + + MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff +}; + +// Defines MSL characteristics of an input variable at a particular location. +// After compilation, it is possible to query whether or not this location was used. +// If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, +// or behavior is undefined. +struct MSLShaderInput +{ + uint32_t location = 0; + MSLShaderInputFormat format = MSL_SHADER_INPUT_FORMAT_OTHER; + spv::BuiltIn builtin = spv::BuiltInMax; + uint32_t vecsize = 0; +}; + +// Matches the binding index of a MSL resource for a binding within a descriptor set. +// Taken together, the stage, desc_set and binding combine to form a reference to a resource +// descriptor used in a particular shading stage. The count field indicates the number of +// resources consumed by this binding, if the binding represents an array of resources. +// If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value +// will be used to declare the array size in MSL, which does not support run-time-sized arrays. +// For resources that are not held in a run-time-sized array, the count field does not need to be populated. +// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, +// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we +// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. +// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a +// [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. +struct MSLResourceBinding +{ + spv::ExecutionModel stage = spv::ExecutionModelMax; + uint32_t desc_set = 0; + uint32_t binding = 0; + uint32_t count = 0; + uint32_t msl_buffer = 0; + uint32_t msl_texture = 0; + uint32_t msl_sampler = 0; +}; + +enum MSLSamplerCoord +{ + MSL_SAMPLER_COORD_NORMALIZED = 0, + MSL_SAMPLER_COORD_PIXEL = 1, + MSL_SAMPLER_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerFilter +{ + MSL_SAMPLER_FILTER_NEAREST = 0, + MSL_SAMPLER_FILTER_LINEAR = 1, + MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerMipFilter +{ + MSL_SAMPLER_MIP_FILTER_NONE = 0, + MSL_SAMPLER_MIP_FILTER_NEAREST = 1, + MSL_SAMPLER_MIP_FILTER_LINEAR = 2, + MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerAddress +{ + MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, + MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, + MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, + MSL_SAMPLER_ADDRESS_REPEAT = 3, + MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, + MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerCompareFunc +{ + MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, + MSL_SAMPLER_COMPARE_FUNC_LESS = 1, + MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, + MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, + MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, + MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, + MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, + MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, + MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerBorderColor +{ + MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, + MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, + MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, + MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff +}; + +enum MSLFormatResolution +{ + MSL_FORMAT_RESOLUTION_444 = 0, + MSL_FORMAT_RESOLUTION_422, + MSL_FORMAT_RESOLUTION_420, + MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +}; + +enum MSLChromaLocation +{ + MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + MSL_CHROMA_LOCATION_MIDPOINT, + MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +}; + +enum MSLComponentSwizzle +{ + MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + MSL_COMPONENT_SWIZZLE_ZERO, + MSL_COMPONENT_SWIZZLE_ONE, + MSL_COMPONENT_SWIZZLE_R, + MSL_COMPONENT_SWIZZLE_G, + MSL_COMPONENT_SWIZZLE_B, + MSL_COMPONENT_SWIZZLE_A, + MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrModelConversion +{ + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrRange +{ + MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +}; + +struct MSLConstexprSampler +{ + MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; + MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE; + MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; + MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; + MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; + MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER; + MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK; + float lod_clamp_min = 0.0f; + float lod_clamp_max = 1000.0f; + int max_anisotropy = 1; + + // Sampler Y'CbCr conversion parameters + uint32_t planes = 0; + MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; + MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY + MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; + MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; + uint32_t bpc = 8; + + bool compare_enable = false; + bool lod_clamp_enable = false; + bool anisotropy_enable = false; + bool ycbcr_conversion_enable = false; + + MSLConstexprSampler() + { + for (uint32_t i = 0; i < 4; i++) + swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; + } + bool swizzle_is_identity() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && + swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); + } + bool swizzle_has_one_or_zero() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); + } +}; + +// Special constant used in a MSLResourceBinding desc_set +// element to indicate the bindings for the push constants. +// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. +static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet; + +// Special constant used in a MSLResourceBinding binding +// element to indicate the bindings for the push constants. +// Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. +static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding; + +// Special constant used in a MSLResourceBinding binding +// element to indicate the buffer binding for swizzle buffers. +static const uint32_t kSwizzleBufferBinding = ~(1u); + +// Special constant used in a MSLResourceBinding binding +// element to indicate the buffer binding for buffer size buffers to support OpArrayLength. +static const uint32_t kBufferSizeBufferBinding = ~(2u); + +// Special constant used in a MSLResourceBinding binding +// element to indicate the buffer binding used for the argument buffer itself. +// This buffer binding should be kept as small as possible as all automatic bindings for buffers +// will start at max(kArgumentBufferBinding) + 1. +static const uint32_t kArgumentBufferBinding = ~(3u); + +static const uint32_t kMaxArgumentBuffers = 8; + +// The arbitrary maximum for the nesting of array of array copies. +static const uint32_t kArrayCopyMultidimMax = 6; + +// Decompiles SPIR-V to Metal Shading Language +class CompilerMSL : public CompilerGLSL +{ +public: + // Options for compiling to Metal Shading Language + struct Options + { + typedef enum + { + iOS = 0, + macOS = 1 + } Platform; + + Platform platform = macOS; + uint32_t msl_version = make_msl_version(1, 2); + uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers + uint32_t r32ui_linear_texture_alignment = 4; + uint32_t r32ui_alignment_constant_id = 65535; + uint32_t swizzle_buffer_index = 30; + uint32_t indirect_params_buffer_index = 29; + uint32_t shader_output_buffer_index = 28; + uint32_t shader_patch_output_buffer_index = 27; + uint32_t shader_tess_factor_buffer_index = 26; + uint32_t buffer_size_buffer_index = 25; + uint32_t view_mask_buffer_index = 24; + uint32_t dynamic_offsets_buffer_index = 23; + uint32_t shader_input_buffer_index = 22; + uint32_t shader_index_buffer_index = 21; + uint32_t shader_input_wg_index = 0; + uint32_t device_index = 0; + uint32_t enable_frag_output_mask = 0xffffffff; + // Metal doesn't allow setting a fixed sample mask directly in the pipeline. + // We can evade this restriction by ANDing the internal sample_mask output + // of the shader with the additional fixed sample mask. + uint32_t additional_fixed_sample_mask = 0xffffffff; + bool enable_point_size_builtin = true; + bool enable_frag_depth_builtin = true; + bool enable_frag_stencil_ref_builtin = true; + bool disable_rasterization = false; + bool capture_output_to_buffer = false; + bool swizzle_texture_samples = false; + bool tess_domain_origin_lower_left = false; + bool multiview = false; + bool multiview_layered_rendering = true; + bool view_index_from_device_index = false; + bool dispatch_base = false; + bool texture_1D_as_2D = false; + + // Enable use of MSL 2.0 indirect argument buffers. + // MSL 2.0 must also be enabled. + bool argument_buffers = false; + + // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. + bool enable_base_index_zero = false; + + // Fragment output in MSL must have at least as many components as the render pass. + // Add support to explicit pad out components. + bool pad_fragment_output_components = false; + + // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes. + bool ios_support_base_vertex_instance = false; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + bool use_framebuffer_fetch_subpasses = false; + + // Enables use of "fma" intrinsic for invariant float math + bool invariant_float_math = false; + + // Emulate texturecube_array with texture2d_array for iOS where this type is not available + bool emulate_cube_array = false; + + // Allow user to enable decoration binding + bool enable_decoration_binding = false; + + // Requires MSL 2.1, use the native support for texel buffers. + bool texture_buffer_native = false; + + // Forces all resources which are part of an argument buffer to be considered active. + // This ensures ABI compatibility between shaders where some resources might be unused, + // and would otherwise declare a different IAB. + bool force_active_argument_buffer_resources = false; + + // Forces the use of plain arrays, which works around certain driver bugs on certain versions + // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. + // May reduce performance in scenarios where arrays are copied around as value-types. + bool force_native_arrays = false; + + // If a shader writes clip distance, also emit user varyings which + // can be read in subsequent stages. + bool enable_clip_distance_user_varying = true; + + // In a tessellation control shader, assume that more than one patch can be processed in a + // single workgroup. This requires changes to the way the InvocationId and PrimitiveId + // builtins are processed, but should result in more efficient usage of the GPU. + bool multi_patch_workgroup = false; + + // If set, a vertex shader will be compiled as part of a tessellation pipeline. + // It will be translated as a compute kernel, so it can use the global invocation ID + // to index the output buffer. + bool vertex_for_tessellation = false; + + // Assume that SubpassData images have multiple layers. Layered input attachments + // are addressed relative to the Layer output from the vertex pipeline. This option + // has no effect with multiview, since all input attachments are assumed to be layered + // and will be addressed using the current ViewIndex. + bool arrayed_subpass_input = false; + + // Whether to use SIMD-group or quadgroup functions to implement group nnon-uniform + // operations. Some GPUs on iOS do not support the SIMD-group functions, only the + // quadgroup functions. + bool ios_use_simdgroup_functions = false; + + // If set, the subgroup size will be assumed to be one, and subgroup-related + // builtins and operations will be emitted accordingly. This mode is intended to + // be used by MoltenVK on hardware/software configurations which do not provide + // sufficient support for subgroups. + bool emulate_subgroups = false; + + // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, + // allows the SIMD-group size (aka thread execution width) to vary depending on + // register usage and requirements. In certain circumstances--for example, a pipeline + // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- + // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of + // mapping it to the Metal builtin [[thread_execution_width]]. If the thread + // execution width is reduced, the extra invocations will appear to be inactive. + // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped + // to the Metal [[thread_execution_width]] builtin. + uint32_t fixed_subgroup_size = 0; + + enum class IndexType + { + None = 0, + UInt16 = 1, + UInt32 = 2 + }; + + // The type of index in the index buffer, if present. For a compute shader, Metal + // requires specifying the indexing at pipeline creation, rather than at draw time + // as with graphics pipelines. This means we must create three different pipelines, + // for no indexing, 16-bit indices, and 32-bit indices. Each requires different + // handling for the gl_VertexIndex builtin. We may as well, then, create three + // different shaders for these three scenarios. + IndexType vertex_index_type = IndexType::None; + + // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. + // This will force the shader to run at sample rate, assuming Metal does not optimize + // the extra threads away. + bool force_sample_rate_shading = false; + + bool is_ios() const + { + return platform == iOS; + } + + bool is_macos() const + { + return platform == macOS; + } + + void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + { + msl_version = make_msl_version(major, minor, patch); + } + + bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const + { + return msl_version >= make_msl_version(major, minor, patch); + } + + static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + { + return (major * 10000) + (minor * 100) + patch; + } + }; + + const Options &get_msl_options() const + { + return msl_options; + } + + void set_msl_options(const Options &opts) + { + msl_options = opts; + } + + // Provide feedback to calling API to allow runtime to disable pipeline + // rasterization if vertex shader requires rasterization to be disabled. + bool get_is_rasterization_disabled() const + { + return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex || + get_entry_point().model == spv::ExecutionModelTessellationControl || + get_entry_point().model == spv::ExecutionModelTessellationEvaluation); + } + + // Provide feedback to calling API to allow it to pass an auxiliary + // swizzle buffer if the shader needs it. + bool needs_swizzle_buffer() const + { + return used_swizzle_buffer; + } + + // Provide feedback to calling API to allow it to pass a buffer + // containing STORAGE_BUFFER buffer sizes to support OpArrayLength. + bool needs_buffer_size_buffer() const + { + return !buffers_requiring_array_length.empty(); + } + + // Provide feedback to calling API to allow it to pass a buffer + // containing the view mask for the current multiview subpass. + bool needs_view_mask_buffer() const + { + return msl_options.multiview && !msl_options.view_index_from_device_index; + } + + // Provide feedback to calling API to allow it to pass a buffer + // containing the dispatch base workgroup ID. + bool needs_dispatch_base_buffer() const + { + return msl_options.dispatch_base && !msl_options.supports_msl_version(1, 2); + } + + // Provide feedback to calling API to allow it to pass an output + // buffer if the shader needs it. + bool needs_output_buffer() const + { + return capture_output_to_buffer && stage_out_var_id != ID(0); + } + + // Provide feedback to calling API to allow it to pass a patch output + // buffer if the shader needs it. + bool needs_patch_output_buffer() const + { + return capture_output_to_buffer && patch_stage_out_var_id != ID(0); + } + + // Provide feedback to calling API to allow it to pass an input threadgroup + // buffer if the shader needs it. + bool needs_input_threadgroup_mem() const + { + return capture_output_to_buffer && stage_in_var_id != ID(0); + } + + explicit CompilerMSL(std::vector spirv); + CompilerMSL(const uint32_t *ir, size_t word_count); + explicit CompilerMSL(const ParsedIR &ir); + explicit CompilerMSL(ParsedIR &&ir); + + // input is a shader input description used to fix up shader input variables. + // If shader inputs are provided, is_msl_shader_input_used() will return true after + // calling ::compile() if the location was used by the MSL code. + void add_msl_shader_input(const MSLShaderInput &input); + + // resource is a resource binding to indicate the MSL buffer, + // texture or sampler index to use for a particular SPIR-V description set + // and binding. If resource bindings are provided, + // is_msl_resource_binding_used() will return true after calling ::compile() if + // the set/binding combination was used by the MSL code. + void add_msl_resource_binding(const MSLResourceBinding &resource); + + // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource + // in this shader. index is the index within the dynamic offset buffer to use. This + // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC + // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers + // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with + // an offset taken from the dynamic offset buffer. + void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); + + // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource + // in this shader. This function marks that resource as an inline uniform block + // (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers + // are enabled. If so, the buffer block will be directly embedded into the argument + // buffer, instead of being referenced indirectly via pointer. + void add_inline_uniform_block(uint32_t desc_set, uint32_t binding); + + // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. + // This corresponds to VK_KHR_push_descriptor in Vulkan. + void add_discrete_descriptor_set(uint32_t desc_set); + + // If an argument buffer is large enough, it may need to be in the device storage space rather than + // constant. Opt-in to this behavior here on a per set basis. + void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); + + // Query after compilation is done. This allows you to check if an input location was used by the shader. + bool is_msl_shader_input_used(uint32_t location); + + // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. + // Constexpr samplers are always assumed to be emitted. + // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped + // by remap_constexpr_sampler(_by_binding). + bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; + + // This must only be called after a successful call to CompilerMSL::compile(). + // For a variable resource ID obtained through reflection API, report the automatically assigned resource index. + // If the descriptor set was part of an argument buffer, report the [[id(N)]], + // or [[buffer/texture/sampler]] binding for other resources. + // If the resource was a combined image sampler, report the image binding here, + // use the _secondary version of this call to query the sampler half of the resource. + // If no binding exists, uint32_t(-1) is returned. + uint32_t get_automatic_msl_resource_binding(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the + // sampler's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, + // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, + // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; + + // Compiles the SPIR-V code into Metal Shading Language. + std::string compile() override; + + // Remap a sampler with ID to a constexpr sampler. + // Older iOS targets must use constexpr samplers in certain cases (PCF), + // so a static sampler must be used. + // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler. + // This can be used on both combined image/samplers (sampler2D) or standalone samplers. + // The remapped sampler must not be an array of samplers. + // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. + void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); + + // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. + // Remaps based on ID take priority over set/binding remaps. + void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler); + + // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect + // to use for a particular location. The default is 4 if number of components is not overridden. + void set_fragment_output_components(uint32_t location, uint32_t components); + + void set_combined_sampler_suffix(const char *suffix); + const char *get_combined_sampler_suffix() const; + +protected: + // An enum of SPIR-V functions that are implemented in additional + // source code that is added to the shader if necessary. + enum SPVFuncImpl + { + SPVFuncImplNone, + SPVFuncImplMod, + SPVFuncImplRadians, + SPVFuncImplDegrees, + SPVFuncImplFindILsb, + SPVFuncImplFindSMsb, + SPVFuncImplFindUMsb, + SPVFuncImplSSign, + SPVFuncImplArrayCopyMultidimBase, + // Unfortunately, we cannot use recursive templates in the MSL compiler properly, + // so stamp out variants up to some arbitrary maximum. + SPVFuncImplArrayCopy = SPVFuncImplArrayCopyMultidimBase + 1, + SPVFuncImplArrayOfArrayCopy2Dim = SPVFuncImplArrayCopyMultidimBase + 2, + SPVFuncImplArrayOfArrayCopy3Dim = SPVFuncImplArrayCopyMultidimBase + 3, + SPVFuncImplArrayOfArrayCopy4Dim = SPVFuncImplArrayCopyMultidimBase + 4, + SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5, + SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6, + SPVFuncImplTexelBufferCoords, + SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations + SPVFuncImplFMul, + SPVFuncImplFAdd, + SPVFuncImplCubemapTo2DArrayFace, + SPVFuncImplUnsafeArray, // Allow Metal to use the array template to make arrays a value type + SPVFuncImplInverse4x4, + SPVFuncImplInverse3x3, + SPVFuncImplInverse2x2, + // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's + // emitted before them. + SPVFuncImplForwardArgs, + // Likewise, this must come before *Swizzle. + SPVFuncImplGetSwizzle, + SPVFuncImplTextureSwizzle, + SPVFuncImplGatherSwizzle, + SPVFuncImplGatherCompareSwizzle, + SPVFuncImplSubgroupBroadcast, + SPVFuncImplSubgroupBroadcastFirst, + SPVFuncImplSubgroupBallot, + SPVFuncImplSubgroupBallotBitExtract, + SPVFuncImplSubgroupBallotFindLSB, + SPVFuncImplSubgroupBallotFindMSB, + SPVFuncImplSubgroupBallotBitCount, + SPVFuncImplSubgroupAllEqual, + SPVFuncImplSubgroupShuffle, + SPVFuncImplSubgroupShuffleXor, + SPVFuncImplSubgroupShuffleUp, + SPVFuncImplSubgroupShuffleDown, + SPVFuncImplQuadBroadcast, + SPVFuncImplQuadSwap, + SPVFuncImplReflectScalar, + SPVFuncImplRefractScalar, + SPVFuncImplFaceForwardScalar, + SPVFuncImplChromaReconstructNearest2Plane, + SPVFuncImplChromaReconstructNearest3Plane, + SPVFuncImplChromaReconstructLinear422CositedEven2Plane, + SPVFuncImplChromaReconstructLinear422CositedEven3Plane, + SPVFuncImplChromaReconstructLinear422Midpoint2Plane, + SPVFuncImplChromaReconstructLinear422Midpoint3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, + SPVFuncImplExpandITUFullRange, + SPVFuncImplExpandITUNarrowRange, + SPVFuncImplConvertYCbCrBT709, + SPVFuncImplConvertYCbCrBT601, + SPVFuncImplConvertYCbCrBT2020, + SPVFuncImplDynamicImageSampler, + }; + + // If the underlying resource has been used for comparison then duplicate loads of that resource must be too + // Use Metal's native frame-buffer fetch API for subpass inputs. + void emit_texture_op(const Instruction &i, bool sparse) override; + void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_instruction(const Instruction &instr) override; + void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count) override; + void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count) override; + void emit_header() override; + void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; + void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; + void emit_subgroup_op(const Instruction &i) override; + std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector &inherited_expressions) override; + void emit_fixup() override; + std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier = ""); + void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier = "", uint32_t base_offset = 0) override; + void emit_struct_padding_target(const SPIRType &type) override; + std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; + + // Allow Metal to use the array template to make arrays a value type + std::string type_to_array_glsl(const SPIRType &type) override; + + // Threadgroup arrays can't have a wrapper type + std::string variable_decl(const SPIRVariable &variable) override; + + // GCC workaround of lambdas calling protected functions (for older GCC versions) + std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; + + std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; + std::string sampler_type(const SPIRType &type, uint32_t id); + std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; + std::string to_name(uint32_t id, bool allow_alias = true) const override; + std::string to_function_name(const TextureFunctionNameArguments &args) override; + std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override; + std::string to_initializer_expression(const SPIRVariable &var) override; + std::string to_zero_initialized_expression(uint32_t type_id) override; + + std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, + bool is_packed, bool row_major) override; + + // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal. + bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; + + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; + bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override; + bool skip_argument(uint32_t id) const override; + std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override; + std::string to_qualifiers_glsl(uint32_t id) override; + void replace_illegal_names() override; + void declare_undefined_values() override; + void declare_constant_arrays(); + + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries + void declare_complex_constant_arrays(); + + bool is_patch_block(const SPIRType &type); + bool is_non_native_row_major_matrix(uint32_t id) override; + bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; + std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed) override; + + void preprocess_op_codes(); + void localize_global_variables(); + void extract_global_variables_from_functions(); + void mark_packable_structs(); + void mark_as_packable(SPIRType &type); + + std::unordered_map> function_global_vars; + void extract_global_variables_from_function(uint32_t func_id, std::set &added_arg_ids, + std::unordered_set &global_var_ids, + std::unordered_set &processed_func_ids); + uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); + uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); + + struct InterfaceBlockMeta + { + struct LocationMeta + { + uint32_t num_components = 0; + uint32_t ib_index = ~0u; + }; + std::unordered_map location_meta; + bool strip_array = false; + }; + + void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, InterfaceBlockMeta &meta); + void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); + void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); + void add_plain_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, uint32_t index, + InterfaceBlockMeta &meta); + void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, uint32_t index, + InterfaceBlockMeta &meta); + uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array); + void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); + + void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); + + void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, spv::StorageClass storage); + uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); + uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t num_components = 0); + + void emit_custom_templates(); + void emit_custom_functions(); + void emit_resources(); + void emit_specialization_constants_and_structs(); + void emit_interface_block(uint32_t ib_var_id); + bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); + uint32_t get_resource_array_size(uint32_t id) const; + + void fix_up_shader_inputs_outputs(); + + std::string func_type_decl(SPIRType &type); + std::string entry_point_args_classic(bool append_comma); + std::string entry_point_args_argument_buffer(bool append_comma); + std::string entry_point_arg_stage_in(); + void entry_point_args_builtin(std::string &args); + void entry_point_args_discrete_descriptors(std::string &args); + std::string to_qualified_member_name(const SPIRType &type, uint32_t index); + std::string ensure_valid_name(std::string name, std::string pfx); + std::string to_sampler_expression(uint32_t id); + std::string to_swizzle_expression(uint32_t id); + std::string to_buffer_size_expression(uint32_t id); + bool is_sample_rate() const; + bool is_direct_input_builtin(spv::BuiltIn builtin); + std::string builtin_qualifier(spv::BuiltIn builtin); + std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); + std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); + std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); + std::string argument_decl(const SPIRFunction::Parameter &arg); + std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); + uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); + uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); + + // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. + // These values can change depending on various extended decorations which control packing rules. + // We need to make these rules match up with SPIR-V declared rules. + uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const; + uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const; + + uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; + + uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; + uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const; + + const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; + SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const; + + uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, + bool ignore_padding = false) const; + + std::string to_component_argument(uint32_t id); + void align_struct(SPIRType &ib_type, std::unordered_set &aligned_structs); + void mark_scalar_layout_structs(const SPIRType &ib_type); + void mark_struct_members_packed(const SPIRType &type); + void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index); + bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; + std::string get_argument_address_space(const SPIRVariable &argument); + std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); + const char *to_restrict(uint32_t id, bool space = true); + SPIRType &get_stage_in_struct_type(); + SPIRType &get_stage_out_struct_type(); + SPIRType &get_patch_stage_in_struct_type(); + SPIRType &get_patch_stage_out_struct_type(); + std::string get_tess_factor_struct_name(); + SPIRType &get_uint_type(); + uint32_t get_uint_type_id(); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, + uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, + bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); + const char *get_memory_order(uint32_t spv_mem_sem); + void add_pragma_line(const std::string &line); + void add_typedef_line(const std::string &line); + void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); + void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, + spv::StorageClass rhs_storage) override; + void build_implicit_builtins(); + uint32_t build_constant_uint_array_pointer(); + void emit_entry_point_declarations() override; + uint32_t builtin_frag_coord_id = 0; + uint32_t builtin_sample_id_id = 0; + uint32_t builtin_sample_mask_id = 0; + uint32_t builtin_vertex_idx_id = 0; + uint32_t builtin_base_vertex_id = 0; + uint32_t builtin_instance_idx_id = 0; + uint32_t builtin_base_instance_id = 0; + uint32_t builtin_view_idx_id = 0; + uint32_t builtin_layer_id = 0; + uint32_t builtin_invocation_id_id = 0; + uint32_t builtin_primitive_id_id = 0; + uint32_t builtin_subgroup_invocation_id_id = 0; + uint32_t builtin_subgroup_size_id = 0; + uint32_t builtin_dispatch_base_id = 0; + uint32_t builtin_stage_input_size_id = 0; + uint32_t builtin_local_invocation_index_id = 0; + uint32_t builtin_workgroup_size_id = 0; + uint32_t swizzle_buffer_id = 0; + uint32_t buffer_size_buffer_id = 0; + uint32_t view_mask_buffer_id = 0; + uint32_t dynamic_offsets_buffer_id = 0; + uint32_t uint_type_id = 0; + + bool does_shader_write_sample_mask = false; + + void cast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; + void cast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; + void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; + + void analyze_sampled_image_usage(); + + void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, + bool &is_packed) override; + void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); + bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); + bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); + bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); + + void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin); + + void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); + + std::string convert_to_f32(const std::string &expr, uint32_t components); + + Options msl_options; + std::set spv_function_implementations; + // Must be ordered to ensure declarations are in a specific order. + std::map inputs_by_location; + std::unordered_map inputs_by_builtin; + std::unordered_set inputs_in_use; + std::unordered_map fragment_output_components; + std::set pragma_lines; + std::set typedef_lines; + SmallVector vars_needing_early_declaration; + + std::unordered_map, InternalHasher> resource_bindings; + + uint32_t next_metal_resource_index_buffer = 0; + uint32_t next_metal_resource_index_texture = 0; + uint32_t next_metal_resource_index_sampler = 0; + // Intentionally uninitialized, works around MSVC 2013 bug. + uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; + + VariableID stage_in_var_id = 0; + VariableID stage_out_var_id = 0; + VariableID patch_stage_in_var_id = 0; + VariableID patch_stage_out_var_id = 0; + VariableID stage_in_ptr_var_id = 0; + VariableID stage_out_ptr_var_id = 0; + + // Handle HLSL-style 0-based vertex/instance index. + enum class TriState + { + Neutral, + No, + Yes + }; + TriState needs_base_vertex_arg = TriState::Neutral; + TriState needs_base_instance_arg = TriState::Neutral; + + bool has_sampled_images = false; + bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. + + bool is_using_builtin_array = false; // Force the use of C style array declaration. + bool using_builtin_array() const; + + bool is_rasterization_disabled = false; + bool capture_output_to_buffer = false; + bool needs_swizzle_buffer_def = false; + bool used_swizzle_buffer = false; + bool added_builtin_tess_level = false; + bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; + bool needs_sample_id = false; + std::string qual_pos_var_name; + std::string stage_in_var_name = "in"; + std::string stage_out_var_name = "out"; + std::string patch_stage_in_var_name = "patchIn"; + std::string patch_stage_out_var_name = "patchOut"; + std::string sampler_name_suffix = "Smplr"; + std::string swizzle_name_suffix = "Swzl"; + std::string buffer_size_name_suffix = "BufferSize"; + std::string plane_name_suffix = "Plane"; + std::string input_wg_var_name = "gl_in"; + std::string input_buffer_var_name = "spvIn"; + std::string output_buffer_var_name = "spvOut"; + std::string patch_output_buffer_var_name = "spvPatchOut"; + std::string tess_factor_buffer_var_name = "spvTessLevel"; + std::string index_buffer_var_name = "spvIndices"; + spv::Op previous_instruction_opcode = spv::OpNop; + + // Must be ordered since declaration is in a specific order. + std::map constexpr_samplers_by_id; + std::unordered_map constexpr_samplers_by_binding; + const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; + + std::unordered_set buffers_requiring_array_length; + SmallVector buffer_arrays; + std::unordered_set atomic_image_vars; // Emulate texture2D atomic operations + std::unordered_set pull_model_inputs; + + // Must be ordered since array is in a specific order. + std::map> buffers_requiring_dynamic_offset; + + SmallVector disabled_frag_outputs; + + std::unordered_set inline_uniform_blocks; + + uint32_t argument_buffer_ids[kMaxArgumentBuffers]; + uint32_t argument_buffer_discrete_mask = 0; + uint32_t argument_buffer_device_storage_mask = 0; + + void analyze_argument_buffers(); + bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; + + uint32_t get_target_components_for_fragment_location(uint32_t location) const; + uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, + SPIRType::BaseType basetype = SPIRType::Unknown); + uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); + + bool suppress_missing_prototypes = false; + + void add_spv_func_and_recompile(SPVFuncImpl spv_func); + + void activate_argument_buffer_resources(); + + bool type_is_msl_framebuffer_fetch(const SPIRType &type) const; + + // OpcodeHandler that handles several MSL preprocessing operations. + struct OpCodePreprocessor : OpcodeHandler + { + OpCodePreprocessor(CompilerMSL &compiler_) + : compiler(compiler_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args); + void check_resource_write(uint32_t var_id); + + CompilerMSL &compiler; + std::unordered_map result_types; + std::unordered_map image_pointers; // Emulate texture2D atomic operations + bool suppress_missing_prototypes = false; + bool uses_atomics = false; + bool uses_resource_write = false; + bool needs_subgroup_invocation_id = false; + bool needs_subgroup_size = false; + bool needs_sample_id = false; + }; + + // OpcodeHandler that scans for uses of sampled images + struct SampledImageScanner : OpcodeHandler + { + SampledImageScanner(CompilerMSL &compiler_) + : compiler(compiler_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t) override; + + CompilerMSL &compiler; + }; + + // Sorts the members of a SPIRType and associated Meta info based on a settable sorting + // aspect, which defines which aspect of the struct members will be used to sort them. + // Regardless of the sorting aspect, built-in members always appear at the end of the struct. + struct MemberSorter + { + enum SortAspect + { + Location, + LocationReverse, + Offset, + OffsetThenLocationReverse, + Alphabetical + }; + + void sort(); + bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2); + MemberSorter(SPIRType &t, Meta &m, SortAspect sa); + + SPIRType &type; + Meta &meta; + SortAspect sort_aspect; + }; +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_parser.cpp b/dep/spirv-cross/spirv_parser.cpp new file mode 100644 index 000000000..b7144c910 --- /dev/null +++ b/dep/spirv-cross/spirv_parser.cpp @@ -0,0 +1,1183 @@ +/* + * Copyright 2018-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_parser.hpp" +#include + +using namespace std; +using namespace spv; + +namespace SPIRV_CROSS_NAMESPACE +{ +Parser::Parser(vector spirv) +{ + ir.spirv = move(spirv); +} + +Parser::Parser(const uint32_t *spirv_data, size_t word_count) +{ + ir.spirv = vector(spirv_data, spirv_data + word_count); +} + +static bool decoration_is_string(Decoration decoration) +{ + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + return true; + + default: + return false; + } +} + +static inline uint32_t swap_endian(uint32_t v) +{ + return ((v >> 24) & 0x000000ffu) | ((v >> 8) & 0x0000ff00u) | ((v << 8) & 0x00ff0000u) | ((v << 24) & 0xff000000u); +} + +static bool is_valid_spirv_version(uint32_t version) +{ + switch (version) + { + // Allow v99 since it tends to just work. + case 99: + case 0x10000: // SPIR-V 1.0 + case 0x10100: // SPIR-V 1.1 + case 0x10200: // SPIR-V 1.2 + case 0x10300: // SPIR-V 1.3 + case 0x10400: // SPIR-V 1.4 + case 0x10500: // SPIR-V 1.5 + return true; + + default: + return false; + } +} + +void Parser::parse() +{ + auto &spirv = ir.spirv; + + auto len = spirv.size(); + if (len < 5) + SPIRV_CROSS_THROW("SPIRV file too small."); + + auto s = spirv.data(); + + // Endian-swap if we need to. + if (s[0] == swap_endian(MagicNumber)) + transform(begin(spirv), end(spirv), begin(spirv), [](uint32_t c) { return swap_endian(c); }); + + if (s[0] != MagicNumber || !is_valid_spirv_version(s[1])) + SPIRV_CROSS_THROW("Invalid SPIRV format."); + + uint32_t bound = s[3]; + + const uint32_t MaximumNumberOfIDs = 0x3fffff; + if (bound > MaximumNumberOfIDs) + SPIRV_CROSS_THROW("ID bound exceeds limit of 0x3fffff.\n"); + + ir.set_id_bounds(bound); + + uint32_t offset = 5; + + SmallVector instructions; + while (offset < len) + { + Instruction instr = {}; + instr.op = spirv[offset] & 0xffff; + instr.count = (spirv[offset] >> 16) & 0xffff; + + if (instr.count == 0) + SPIRV_CROSS_THROW("SPIR-V instructions cannot consume 0 words. Invalid SPIR-V file."); + + instr.offset = offset + 1; + instr.length = instr.count - 1; + + offset += instr.count; + + if (offset > spirv.size()) + SPIRV_CROSS_THROW("SPIR-V instruction goes out of bounds."); + + instructions.push_back(instr); + } + + for (auto &i : instructions) + parse(i); + + for (auto &fixup : forward_pointer_fixups) + { + auto &target = get(fixup.first); + auto &source = get(fixup.second); + target.member_types = source.member_types; + target.basetype = source.basetype; + target.self = source.self; + } + forward_pointer_fixups.clear(); + + if (current_function) + SPIRV_CROSS_THROW("Function was not terminated."); + if (current_block) + SPIRV_CROSS_THROW("Block was not terminated."); +} + +const uint32_t *Parser::stream(const Instruction &instr) const +{ + // If we're not going to use any arguments, just return nullptr. + // We want to avoid case where we return an out of range pointer + // that trips debug assertions on some platforms. + if (!instr.length) + return nullptr; + + if (instr.offset + instr.length > ir.spirv.size()) + SPIRV_CROSS_THROW("Compiler::stream() out of range."); + return &ir.spirv[instr.offset]; +} + +static string extract_string(const vector &spirv, uint32_t offset) +{ + string ret; + for (uint32_t i = offset; i < spirv.size(); i++) + { + uint32_t w = spirv[i]; + + for (uint32_t j = 0; j < 4; j++, w >>= 8) + { + char c = w & 0xff; + if (c == '\0') + return ret; + ret += c; + } + } + + SPIRV_CROSS_THROW("String was not terminated before EOF"); +} + +void Parser::parse(const Instruction &instruction) +{ + auto *ops = stream(instruction); + auto op = static_cast(instruction.op); + uint32_t length = instruction.length; + + switch (op) + { + case OpSourceContinued: + case OpSourceExtension: + case OpNop: + case OpModuleProcessed: + break; + + case OpString: + { + set(ops[0], extract_string(ir.spirv, instruction.offset + 1)); + break; + } + + case OpMemoryModel: + ir.addressing_model = static_cast(ops[0]); + ir.memory_model = static_cast(ops[1]); + break; + + case OpSource: + { + auto lang = static_cast(ops[0]); + switch (lang) + { + case SourceLanguageESSL: + ir.source.es = true; + ir.source.version = ops[1]; + ir.source.known = true; + ir.source.hlsl = false; + break; + + case SourceLanguageGLSL: + ir.source.es = false; + ir.source.version = ops[1]; + ir.source.known = true; + ir.source.hlsl = false; + break; + + case SourceLanguageHLSL: + // For purposes of cross-compiling, this is GLSL 450. + ir.source.es = false; + ir.source.version = 450; + ir.source.known = true; + ir.source.hlsl = true; + break; + + default: + ir.source.known = false; + break; + } + break; + } + + case OpUndef: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + set(id, result_type); + if (current_block) + current_block->ops.push_back(instruction); + break; + } + + case OpCapability: + { + uint32_t cap = ops[0]; + if (cap == CapabilityKernel) + SPIRV_CROSS_THROW("Kernel capability not supported."); + + ir.declared_capabilities.push_back(static_cast(ops[0])); + break; + } + + case OpExtension: + { + auto ext = extract_string(ir.spirv, instruction.offset); + ir.declared_extensions.push_back(move(ext)); + break; + } + + case OpExtInstImport: + { + uint32_t id = ops[0]; + auto ext = extract_string(ir.spirv, instruction.offset + 1); + if (ext == "GLSL.std.450") + set(id, SPIRExtension::GLSL); + else if (ext == "DebugInfo") + set(id, SPIRExtension::SPV_debug_info); + else if (ext == "SPV_AMD_shader_ballot") + set(id, SPIRExtension::SPV_AMD_shader_ballot); + else if (ext == "SPV_AMD_shader_explicit_vertex_parameter") + set(id, SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter); + else if (ext == "SPV_AMD_shader_trinary_minmax") + set(id, SPIRExtension::SPV_AMD_shader_trinary_minmax); + else if (ext == "SPV_AMD_gcn_shader") + set(id, SPIRExtension::SPV_AMD_gcn_shader); + else + set(id, SPIRExtension::Unsupported); + + // Other SPIR-V extensions which have ExtInstrs are currently not supported. + + break; + } + + case OpExtInst: + { + // The SPIR-V debug information extended instructions might come at global scope. + if (current_block) + current_block->ops.push_back(instruction); + break; + } + + case OpEntryPoint: + { + auto itr = + ir.entry_points.insert(make_pair(ops[1], SPIREntryPoint(ops[1], static_cast(ops[0]), + extract_string(ir.spirv, instruction.offset + 2)))); + auto &e = itr.first->second; + + // Strings need nul-terminator and consume the whole word. + uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2); + + for (uint32_t i = strlen_words + 2; i < instruction.length; i++) + e.interface_variables.push_back(ops[i]); + + // Set the name of the entry point in case OpName is not provided later. + ir.set_name(ops[1], e.name); + + // If we don't have an entry, make the first one our "default". + if (!ir.default_entry_point) + ir.default_entry_point = ops[1]; + break; + } + + case OpExecutionMode: + { + auto &execution = ir.entry_points[ops[0]]; + auto mode = static_cast(ops[1]); + execution.flags.set(mode); + + switch (mode) + { + case ExecutionModeInvocations: + execution.invocations = ops[2]; + break; + + case ExecutionModeLocalSize: + execution.workgroup_size.x = ops[2]; + execution.workgroup_size.y = ops[3]; + execution.workgroup_size.z = ops[4]; + break; + + case ExecutionModeOutputVertices: + execution.output_vertices = ops[2]; + break; + + default: + break; + } + break; + } + + case OpName: + { + uint32_t id = ops[0]; + ir.set_name(id, extract_string(ir.spirv, instruction.offset + 1)); + break; + } + + case OpMemberName: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + ir.set_member_name(id, member, extract_string(ir.spirv, instruction.offset + 2)); + break; + } + + case OpDecorationGroup: + { + // Noop, this simply means an ID should be a collector of decorations. + // The meta array is already a flat array of decorations which will contain the relevant decorations. + break; + } + + case OpGroupDecorate: + { + uint32_t group_id = ops[0]; + auto &decorations = ir.meta[group_id].decoration; + auto &flags = decorations.decoration_flags; + + // Copies decorations from one ID to another. Only copy decorations which are set in the group, + // i.e., we cannot just copy the meta structure directly. + for (uint32_t i = 1; i < length; i++) + { + uint32_t target = ops[i]; + flags.for_each_bit([&](uint32_t bit) { + auto decoration = static_cast(bit); + + if (decoration_is_string(decoration)) + { + ir.set_decoration_string(target, decoration, ir.get_decoration_string(group_id, decoration)); + } + else + { + ir.meta[target].decoration_word_offset[decoration] = + ir.meta[group_id].decoration_word_offset[decoration]; + ir.set_decoration(target, decoration, ir.get_decoration(group_id, decoration)); + } + }); + } + break; + } + + case OpGroupMemberDecorate: + { + uint32_t group_id = ops[0]; + auto &flags = ir.meta[group_id].decoration.decoration_flags; + + // Copies decorations from one ID to another. Only copy decorations which are set in the group, + // i.e., we cannot just copy the meta structure directly. + for (uint32_t i = 1; i + 1 < length; i += 2) + { + uint32_t target = ops[i + 0]; + uint32_t index = ops[i + 1]; + flags.for_each_bit([&](uint32_t bit) { + auto decoration = static_cast(bit); + + if (decoration_is_string(decoration)) + ir.set_member_decoration_string(target, index, decoration, + ir.get_decoration_string(group_id, decoration)); + else + ir.set_member_decoration(target, index, decoration, ir.get_decoration(group_id, decoration)); + }); + } + break; + } + + case OpDecorate: + case OpDecorateId: + { + // OpDecorateId technically supports an array of arguments, but our only supported decorations are single uint, + // so merge decorate and decorate-id here. + uint32_t id = ops[0]; + + auto decoration = static_cast(ops[1]); + if (length >= 3) + { + ir.meta[id].decoration_word_offset[decoration] = uint32_t(&ops[2] - ir.spirv.data()); + ir.set_decoration(id, decoration, ops[2]); + } + else + ir.set_decoration(id, decoration); + + break; + } + + case OpDecorateStringGOOGLE: + { + uint32_t id = ops[0]; + auto decoration = static_cast(ops[1]); + ir.set_decoration_string(id, decoration, extract_string(ir.spirv, instruction.offset + 2)); + break; + } + + case OpMemberDecorate: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + auto decoration = static_cast(ops[2]); + if (length >= 4) + ir.set_member_decoration(id, member, decoration, ops[3]); + else + ir.set_member_decoration(id, member, decoration); + break; + } + + case OpMemberDecorateStringGOOGLE: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + auto decoration = static_cast(ops[2]); + ir.set_member_decoration_string(id, member, decoration, extract_string(ir.spirv, instruction.offset + 3)); + break; + } + + // Build up basic types. + case OpTypeVoid: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Void; + break; + } + + case OpTypeBool: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Boolean; + type.width = 1; + break; + } + + case OpTypeFloat: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + auto &type = set(id); + if (width == 64) + type.basetype = SPIRType::Double; + else if (width == 32) + type.basetype = SPIRType::Float; + else if (width == 16) + type.basetype = SPIRType::Half; + else + SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type."); + type.width = width; + break; + } + + case OpTypeInt: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + bool signedness = ops[2] != 0; + auto &type = set(id); + type.basetype = signedness ? to_signed_basetype(width) : to_unsigned_basetype(width); + type.width = width; + break; + } + + // Build composite types by "inheriting". + // NOTE: The self member is also copied! For pointers and array modifiers this is a good thing + // since we can refer to decorations on pointee classes which is needed for UBO/SSBO, I/O blocks in geometry/tess etc. + case OpTypeVector: + { + uint32_t id = ops[0]; + uint32_t vecsize = ops[2]; + + auto &base = get(ops[1]); + auto &vecbase = set(id); + + vecbase = base; + vecbase.vecsize = vecsize; + vecbase.self = id; + vecbase.parent_type = ops[1]; + break; + } + + case OpTypeMatrix: + { + uint32_t id = ops[0]; + uint32_t colcount = ops[2]; + + auto &base = get(ops[1]); + auto &matrixbase = set(id); + + matrixbase = base; + matrixbase.columns = colcount; + matrixbase.self = id; + matrixbase.parent_type = ops[1]; + break; + } + + case OpTypeArray: + { + uint32_t id = ops[0]; + auto &arraybase = set(id); + + uint32_t tid = ops[1]; + auto &base = get(tid); + + arraybase = base; + arraybase.parent_type = tid; + + uint32_t cid = ops[2]; + ir.mark_used_as_array_length(cid); + auto *c = maybe_get(cid); + bool literal = c && !c->specialization; + + // We're copying type information into Array types, so we'll need a fixup for any physical pointer + // references. + if (base.forward_pointer) + forward_pointer_fixups.push_back({ id, tid }); + + arraybase.array_size_literal.push_back(literal); + arraybase.array.push_back(literal ? c->scalar() : cid); + // Do NOT set arraybase.self! + break; + } + + case OpTypeRuntimeArray: + { + uint32_t id = ops[0]; + + auto &base = get(ops[1]); + auto &arraybase = set(id); + + // We're copying type information into Array types, so we'll need a fixup for any physical pointer + // references. + if (base.forward_pointer) + forward_pointer_fixups.push_back({ id, ops[1] }); + + arraybase = base; + arraybase.array.push_back(0); + arraybase.array_size_literal.push_back(true); + arraybase.parent_type = ops[1]; + // Do NOT set arraybase.self! + break; + } + + case OpTypeImage: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Image; + type.image.type = ops[1]; + type.image.dim = static_cast(ops[2]); + type.image.depth = ops[3] == 1; + type.image.arrayed = ops[4] != 0; + type.image.ms = ops[5] != 0; + type.image.sampled = ops[6]; + type.image.format = static_cast(ops[7]); + type.image.access = (length >= 9) ? static_cast(ops[8]) : AccessQualifierMax; + break; + } + + case OpTypeSampledImage: + { + uint32_t id = ops[0]; + uint32_t imagetype = ops[1]; + auto &type = set(id); + type = get(imagetype); + type.basetype = SPIRType::SampledImage; + type.self = id; + break; + } + + case OpTypeSampler: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Sampler; + break; + } + + case OpTypePointer: + { + uint32_t id = ops[0]; + + // Very rarely, we might receive a FunctionPrototype here. + // We won't be able to compile it, but we shouldn't crash when parsing. + // We should be able to reflect. + auto *base = maybe_get(ops[2]); + auto &ptrbase = set(id); + + if (base) + ptrbase = *base; + + ptrbase.pointer = true; + ptrbase.pointer_depth++; + ptrbase.storage = static_cast(ops[1]); + + if (ptrbase.storage == StorageClassAtomicCounter) + ptrbase.basetype = SPIRType::AtomicCounter; + + if (base && base->forward_pointer) + forward_pointer_fixups.push_back({ id, ops[2] }); + + ptrbase.parent_type = ops[2]; + + // Do NOT set ptrbase.self! + break; + } + + case OpTypeForwardPointer: + { + uint32_t id = ops[0]; + auto &ptrbase = set(id); + ptrbase.pointer = true; + ptrbase.pointer_depth++; + ptrbase.storage = static_cast(ops[1]); + ptrbase.forward_pointer = true; + + if (ptrbase.storage == StorageClassAtomicCounter) + ptrbase.basetype = SPIRType::AtomicCounter; + + break; + } + + case OpTypeStruct: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::Struct; + for (uint32_t i = 1; i < length; i++) + type.member_types.push_back(ops[i]); + + // Check if we have seen this struct type before, with just different + // decorations. + // + // Add workaround for issue #17 as well by looking at OpName for the struct + // types, which we shouldn't normally do. + // We should not normally have to consider type aliases like this to begin with + // however ... glslang issues #304, #307 cover this. + + // For stripped names, never consider struct type aliasing. + // We risk declaring the same struct multiple times, but type-punning is not allowed + // so this is safe. + bool consider_aliasing = !ir.get_name(type.self).empty(); + if (consider_aliasing) + { + for (auto &other : global_struct_cache) + { + if (ir.get_name(type.self) == ir.get_name(other) && + types_are_logically_equivalent(type, get(other))) + { + type.type_alias = other; + break; + } + } + + if (type.type_alias == TypeID(0)) + global_struct_cache.push_back(id); + } + break; + } + + case OpTypeFunction: + { + uint32_t id = ops[0]; + uint32_t ret = ops[1]; + + auto &func = set(id, ret); + for (uint32_t i = 2; i < length; i++) + func.parameter_types.push_back(ops[i]); + break; + } + + case OpTypeAccelerationStructureKHR: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::AccelerationStructure; + break; + } + + case OpTypeRayQueryProvisionalKHR: + { + uint32_t id = ops[0]; + auto &type = set(id); + type.basetype = SPIRType::RayQuery; + break; + } + + // Variable declaration + // All variables are essentially pointers with a storage qualifier. + case OpVariable: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + auto storage = static_cast(ops[2]); + uint32_t initializer = length == 4 ? ops[3] : 0; + + if (storage == StorageClassFunction) + { + if (!current_function) + SPIRV_CROSS_THROW("No function currently in scope"); + current_function->add_local_variable(id); + } + + set(id, type, storage, initializer); + break; + } + + // OpPhi + // OpPhi is a fairly magical opcode. + // It selects temporary variables based on which parent block we *came from*. + // In high-level languages we can "de-SSA" by creating a function local, and flush out temporaries to this function-local + // variable to emulate SSA Phi. + case OpPhi: + { + if (!current_function) + SPIRV_CROSS_THROW("No function currently in scope"); + if (!current_block) + SPIRV_CROSS_THROW("No block currently in scope"); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // Instead of a temporary, create a new function-wide temporary with this ID instead. + auto &var = set(id, result_type, spv::StorageClassFunction); + var.phi_variable = true; + + current_function->add_local_variable(id); + + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->phi_variables.push_back({ ops[i], ops[i + 1], id }); + break; + } + + // Constants + case OpSpecConstant: + case OpConstant: + { + uint32_t id = ops[1]; + auto &type = get(ops[0]); + + if (type.width > 32) + set(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant); + else + set(id, ops[0], ops[2], op == OpSpecConstant); + break; + } + + case OpSpecConstantFalse: + case OpConstantFalse: + { + uint32_t id = ops[1]; + set(id, ops[0], uint32_t(0), op == OpSpecConstantFalse); + break; + } + + case OpSpecConstantTrue: + case OpConstantTrue: + { + uint32_t id = ops[1]; + set(id, ops[0], uint32_t(1), op == OpSpecConstantTrue); + break; + } + + case OpConstantNull: + { + uint32_t id = ops[1]; + uint32_t type = ops[0]; + ir.make_constant_null(id, type, true); + break; + } + + case OpSpecConstantComposite: + case OpConstantComposite: + { + uint32_t id = ops[1]; + uint32_t type = ops[0]; + + auto &ctype = get(type); + + // We can have constants which are structs and arrays. + // In this case, our SPIRConstant will be a list of other SPIRConstant ids which we + // can refer to. + if (ctype.basetype == SPIRType::Struct || !ctype.array.empty()) + { + set(id, type, ops + 2, length - 2, op == OpSpecConstantComposite); + } + else + { + uint32_t elements = length - 2; + if (elements > 4) + SPIRV_CROSS_THROW("OpConstantComposite only supports 1, 2, 3 and 4 elements."); + + SPIRConstant remapped_constant_ops[4]; + const SPIRConstant *c[4]; + for (uint32_t i = 0; i < elements; i++) + { + // Specialization constants operations can also be part of this. + // We do not know their value, so any attempt to query SPIRConstant later + // will fail. We can only propagate the ID of the expression and use to_expression on it. + auto *constant_op = maybe_get(ops[2 + i]); + auto *undef_op = maybe_get(ops[2 + i]); + if (constant_op) + { + if (op == OpConstantComposite) + SPIRV_CROSS_THROW("Specialization constant operation used in OpConstantComposite."); + + remapped_constant_ops[i].make_null(get(constant_op->basetype)); + remapped_constant_ops[i].self = constant_op->self; + remapped_constant_ops[i].constant_type = constant_op->basetype; + remapped_constant_ops[i].specialization = true; + c[i] = &remapped_constant_ops[i]; + } + else if (undef_op) + { + // Undefined, just pick 0. + remapped_constant_ops[i].make_null(get(undef_op->basetype)); + remapped_constant_ops[i].constant_type = undef_op->basetype; + c[i] = &remapped_constant_ops[i]; + } + else + c[i] = &get(ops[2 + i]); + } + set(id, type, c, elements, op == OpSpecConstantComposite); + } + break; + } + + // Functions + case OpFunction: + { + uint32_t res = ops[0]; + uint32_t id = ops[1]; + // Control + uint32_t type = ops[3]; + + if (current_function) + SPIRV_CROSS_THROW("Must end a function before starting a new one!"); + + current_function = &set(id, res, type); + break; + } + + case OpFunctionParameter: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + + if (!current_function) + SPIRV_CROSS_THROW("Must be in a function!"); + + current_function->add_parameter(type, id); + set(id, type, StorageClassFunction); + break; + } + + case OpFunctionEnd: + { + if (current_block) + { + // Very specific error message, but seems to come up quite often. + SPIRV_CROSS_THROW( + "Cannot end a function before ending the current block.\n" + "Likely cause: If this SPIR-V was created from glslang HLSL, make sure the entry point is valid."); + } + current_function = nullptr; + break; + } + + // Blocks + case OpLabel: + { + // OpLabel always starts a block. + if (!current_function) + SPIRV_CROSS_THROW("Blocks cannot exist outside functions!"); + + uint32_t id = ops[0]; + + current_function->blocks.push_back(id); + if (!current_function->entry_block) + current_function->entry_block = id; + + if (current_block) + SPIRV_CROSS_THROW("Cannot start a block before ending the current block."); + + current_block = &set(id); + break; + } + + // Branch instructions end blocks. + case OpBranch: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + uint32_t target = ops[0]; + current_block->terminator = SPIRBlock::Direct; + current_block->next_block = target; + current_block = nullptr; + break; + } + + case OpBranchConditional: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + current_block->condition = ops[0]; + current_block->true_block = ops[1]; + current_block->false_block = ops[2]; + + current_block->terminator = SPIRBlock::Select; + current_block = nullptr; + break; + } + + case OpSwitch: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + current_block->terminator = SPIRBlock::MultiSelect; + + current_block->condition = ops[0]; + current_block->default_block = ops[1]; + + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->cases.push_back({ ops[i], ops[i + 1] }); + + // If we jump to next block, make it break instead since we're inside a switch case block at that point. + ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT; + + current_block = nullptr; + break; + } + + case OpKill: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Kill; + current_block = nullptr; + break; + } + + case OpReturn: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Return; + current_block = nullptr; + break; + } + + case OpReturnValue: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Return; + current_block->return_value = ops[0]; + current_block = nullptr; + break; + } + + case OpUnreachable: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Unreachable; + current_block = nullptr; + break; + } + + case OpSelectionMerge: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to modify a non-existing block."); + + current_block->next_block = ops[0]; + current_block->merge = SPIRBlock::MergeSelection; + ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_SELECTION_MERGE_BIT; + + if (length >= 2) + { + if (ops[1] & SelectionControlFlattenMask) + current_block->hint = SPIRBlock::HintFlatten; + else if (ops[1] & SelectionControlDontFlattenMask) + current_block->hint = SPIRBlock::HintDontFlatten; + } + break; + } + + case OpLoopMerge: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to modify a non-existing block."); + + current_block->merge_block = ops[0]; + current_block->continue_block = ops[1]; + current_block->merge = SPIRBlock::MergeLoop; + + ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT; + ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT; + + ir.continue_block_to_loop_header[current_block->continue_block] = BlockID(current_block->self); + + // Don't add loop headers to continue blocks, + // which would make it impossible branch into the loop header since + // they are treated as continues. + if (current_block->continue_block != BlockID(current_block->self)) + ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT; + + if (length >= 3) + { + if (ops[2] & LoopControlUnrollMask) + current_block->hint = SPIRBlock::HintUnroll; + else if (ops[2] & LoopControlDontUnrollMask) + current_block->hint = SPIRBlock::HintDontUnroll; + } + break; + } + + case OpSpecConstantOp: + { + if (length < 3) + SPIRV_CROSS_THROW("OpSpecConstantOp not enough arguments."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto spec_op = static_cast(ops[2]); + + set(id, result_type, spec_op, ops + 3, length - 3); + break; + } + + case OpLine: + { + // OpLine might come at global scope, but we don't care about those since they will not be declared in any + // meaningful correct order. + // Ignore all OpLine directives which live outside a function. + if (current_block) + current_block->ops.push_back(instruction); + + // Line directives may arrive before first OpLabel. + // Treat this as the line of the function declaration, + // so warnings for arguments can propagate properly. + if (current_function) + { + // Store the first one we find and emit it before creating the function prototype. + if (current_function->entry_line.file_id == 0) + { + current_function->entry_line.file_id = ops[0]; + current_function->entry_line.line_literal = ops[1]; + } + } + break; + } + + case OpNoLine: + { + // OpNoLine might come at global scope. + if (current_block) + current_block->ops.push_back(instruction); + break; + } + + // Actual opcodes. + default: + { + if (!current_block) + SPIRV_CROSS_THROW("Currently no block to insert opcode."); + + current_block->ops.push_back(instruction); + break; + } + } +} + +bool Parser::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const +{ + if (a.basetype != b.basetype) + return false; + if (a.width != b.width) + return false; + if (a.vecsize != b.vecsize) + return false; + if (a.columns != b.columns) + return false; + if (a.array.size() != b.array.size()) + return false; + + size_t array_count = a.array.size(); + if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0) + return false; + + if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage) + { + if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0) + return false; + } + + if (a.member_types.size() != b.member_types.size()) + return false; + + size_t member_types = a.member_types.size(); + for (size_t i = 0; i < member_types; i++) + { + if (!types_are_logically_equivalent(get(a.member_types[i]), get(b.member_types[i]))) + return false; + } + + return true; +} + +bool Parser::variable_storage_is_aliased(const SPIRVariable &v) const +{ + auto &type = get(v.basetype); + + auto *type_meta = ir.find_meta(type.self); + + bool ssbo = v.storage == StorageClassStorageBuffer || + (type_meta && type_meta->decoration.decoration_flags.get(DecorationBufferBlock)); + bool image = type.basetype == SPIRType::Image; + bool counter = type.basetype == SPIRType::AtomicCounter; + + bool is_restrict; + if (ssbo) + is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict); + else + is_restrict = ir.has_decoration(v.self, DecorationRestrict); + + return !is_restrict && (ssbo || image || counter); +} +} // namespace SPIRV_CROSS_NAMESPACE diff --git a/dep/spirv-cross/spirv_parser.hpp b/dep/spirv-cross/spirv_parser.hpp new file mode 100644 index 000000000..190266a4e --- /dev/null +++ b/dep/spirv-cross/spirv_parser.hpp @@ -0,0 +1,101 @@ +/* + * Copyright 2018-2020 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_PARSER_HPP +#define SPIRV_CROSS_PARSER_HPP + +#include "spirv_cross_parsed_ir.hpp" +#include + +namespace SPIRV_CROSS_NAMESPACE +{ +class Parser +{ +public: + Parser(const uint32_t *spirv_data, size_t word_count); + Parser(std::vector spirv); + + void parse(); + + ParsedIR &get_parsed_ir() + { + return ir; + } + +private: + ParsedIR ir; + SPIRFunction *current_function = nullptr; + SPIRBlock *current_block = nullptr; + + void parse(const Instruction &instr); + const uint32_t *stream(const Instruction &instr) const; + + template + T &set(uint32_t id, P &&... args) + { + ir.add_typed_id(static_cast(T::type), id); + auto &var = variant_set(ir.ids[id], std::forward

(args)...); + var.self = id; + return var; + } + + template + T &get(uint32_t id) + { + return variant_get(ir.ids[id]); + } + + template + T *maybe_get(uint32_t id) + { + if (ir.ids[id].get_type() == static_cast(T::type)) + return &get(id); + else + return nullptr; + } + + template + const T &get(uint32_t id) const + { + return variant_get(ir.ids[id]); + } + + template + const T *maybe_get(uint32_t id) const + { + if (ir.ids[id].get_type() == T::type) + return &get(id); + else + return nullptr; + } + + // This must be an ordered data structure so we always pick the same type aliases. + SmallVector global_struct_cache; + SmallVector> forward_pointer_fixups; + + bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; + bool variable_storage_is_aliased(const SPIRVariable &v) const; +}; +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/dep/spirv-cross/spirv_reflect.cpp b/dep/spirv-cross/spirv_reflect.cpp new file mode 100644 index 000000000..90dc62bd3 --- /dev/null +++ b/dep/spirv-cross/spirv_reflect.cpp @@ -0,0 +1,706 @@ +/* + * Copyright 2018-2020 Bradley Austin Davis + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#include "spirv_reflect.hpp" +#include "spirv_glsl.hpp" +#include + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +namespace simple_json +{ +enum class Type +{ + Object, + Array, +}; + +using State = std::pair; +using Stack = std::stack; + +class Stream +{ + Stack stack; + StringStream<> buffer; + uint32_t indent{ 0 }; + char current_locale_radix_character = '.'; + +public: + void set_current_locale_radix_character(char c) + { + current_locale_radix_character = c; + } + + void begin_json_object(); + void end_json_object(); + void emit_json_key(const std::string &key); + void emit_json_key_value(const std::string &key, const std::string &value); + void emit_json_key_value(const std::string &key, bool value); + void emit_json_key_value(const std::string &key, uint32_t value); + void emit_json_key_value(const std::string &key, int32_t value); + void emit_json_key_value(const std::string &key, float value); + void emit_json_key_object(const std::string &key); + void emit_json_key_array(const std::string &key); + + void begin_json_array(); + void end_json_array(); + void emit_json_array_value(const std::string &value); + void emit_json_array_value(uint32_t value); + void emit_json_array_value(bool value); + + std::string str() const + { + return buffer.str(); + } + +private: + inline void statement_indent() + { + for (uint32_t i = 0; i < indent; i++) + buffer << " "; + } + + template + inline void statement_inner(T &&t) + { + buffer << std::forward(t); + } + + template + inline void statement_inner(T &&t, Ts &&... ts) + { + buffer << std::forward(t); + statement_inner(std::forward(ts)...); + } + + template + inline void statement(Ts &&... ts) + { + statement_indent(); + statement_inner(std::forward(ts)...); + buffer << '\n'; + } + + template + void statement_no_return(Ts &&... ts) + { + statement_indent(); + statement_inner(std::forward(ts)...); + } +}; +} // namespace simple_json + +using namespace simple_json; + +// Hackery to emit JSON without using nlohmann/json C++ library (which requires a +// higher level of compiler compliance than is required by SPIRV-Cross +void Stream::begin_json_array() +{ + if (!stack.empty() && stack.top().second) + { + statement_inner(",\n"); + } + statement("["); + ++indent; + stack.emplace(Type::Array, false); +} + +void Stream::end_json_array() +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + { + statement_inner("\n"); + } + --indent; + statement_no_return("]"); + stack.pop(); + if (!stack.empty()) + { + stack.top().second = true; + } +} + +void Stream::emit_json_array_value(const std::string &value) +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + + if (stack.top().second) + statement_inner(",\n"); + + statement_no_return("\"", value, "\""); + stack.top().second = true; +} + +void Stream::emit_json_array_value(uint32_t value) +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + statement_inner(",\n"); + statement_no_return(std::to_string(value)); + stack.top().second = true; +} + +void Stream::emit_json_array_value(bool value) +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + statement_inner(",\n"); + statement_no_return(value ? "true" : "false"); + stack.top().second = true; +} + +void Stream::begin_json_object() +{ + if (!stack.empty() && stack.top().second) + { + statement_inner(",\n"); + } + statement("{"); + ++indent; + stack.emplace(Type::Object, false); +} + +void Stream::end_json_object() +{ + if (stack.empty() || stack.top().first != Type::Object) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + { + statement_inner("\n"); + } + --indent; + statement_no_return("}"); + stack.pop(); + if (!stack.empty()) + { + stack.top().second = true; + } +} + +void Stream::emit_json_key(const std::string &key) +{ + if (stack.empty() || stack.top().first != Type::Object) + SPIRV_CROSS_THROW("Invalid JSON state"); + + if (stack.top().second) + statement_inner(",\n"); + statement_no_return("\"", key, "\" : "); + stack.top().second = true; +} + +void Stream::emit_json_key_value(const std::string &key, const std::string &value) +{ + emit_json_key(key); + statement_inner("\"", value, "\""); +} + +void Stream::emit_json_key_value(const std::string &key, uint32_t value) +{ + emit_json_key(key); + statement_inner(value); +} + +void Stream::emit_json_key_value(const std::string &key, int32_t value) +{ + emit_json_key(key); + statement_inner(value); +} + +void Stream::emit_json_key_value(const std::string &key, float value) +{ + emit_json_key(key); + statement_inner(convert_to_string(value, current_locale_radix_character)); +} + +void Stream::emit_json_key_value(const std::string &key, bool value) +{ + emit_json_key(key); + statement_inner(value ? "true" : "false"); +} + +void Stream::emit_json_key_object(const std::string &key) +{ + emit_json_key(key); + statement_inner("{\n"); + ++indent; + stack.emplace(Type::Object, false); +} + +void Stream::emit_json_key_array(const std::string &key) +{ + emit_json_key(key); + statement_inner("[\n"); + ++indent; + stack.emplace(Type::Array, false); +} + +void CompilerReflection::set_format(const std::string &format) +{ + if (format != "json") + { + SPIRV_CROSS_THROW("Unsupported format"); + } +} + +string CompilerReflection::compile() +{ + json_stream = std::make_shared(); + json_stream->set_current_locale_radix_character(current_locale_radix_character); + json_stream->begin_json_object(); + reorder_type_alias(); + emit_entry_points(); + emit_types(); + emit_resources(); + emit_specialization_constants(); + json_stream->end_json_object(); + return json_stream->str(); +} + +static bool naturally_emit_type(const SPIRType &type) +{ + return type.basetype == SPIRType::Struct && !type.pointer && type.array.empty(); +} + +bool CompilerReflection::type_is_reference(const SPIRType &type) const +{ + // Physical pointers and arrays of physical pointers need to refer to the pointee's type. + return type_is_top_level_physical_pointer(type) || + (!type.array.empty() && type_is_top_level_physical_pointer(get(type.parent_type))); +} + +void CompilerReflection::emit_types() +{ + bool emitted_open_tag = false; + + SmallVector physical_pointee_types; + + // If we have physical pointers or arrays of physical pointers, it's also helpful to emit the pointee type + // and chain the type hierarchy. For POD, arrays can emit the entire type in-place. + ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { + if (naturally_emit_type(type)) + { + emit_type(self, emitted_open_tag); + } + else if (type_is_reference(type)) + { + if (!naturally_emit_type(this->get(type.parent_type)) && + find(physical_pointee_types.begin(), physical_pointee_types.end(), type.parent_type) == + physical_pointee_types.end()) + { + physical_pointee_types.push_back(type.parent_type); + } + } + }); + + for (uint32_t pointee_type : physical_pointee_types) + emit_type(pointee_type, emitted_open_tag); + + if (emitted_open_tag) + { + json_stream->end_json_object(); + } +} + +void CompilerReflection::emit_type(uint32_t type_id, bool &emitted_open_tag) +{ + auto &type = get(type_id); + auto name = type_to_glsl(type); + + if (!emitted_open_tag) + { + json_stream->emit_json_key_object("types"); + emitted_open_tag = true; + } + json_stream->emit_json_key_object("_" + std::to_string(type_id)); + json_stream->emit_json_key_value("name", name); + + if (type_is_top_level_physical_pointer(type)) + { + json_stream->emit_json_key_value("type", "_" + std::to_string(type.parent_type)); + json_stream->emit_json_key_value("physical_pointer", true); + } + else if (!type.array.empty()) + { + emit_type_array(type); + json_stream->emit_json_key_value("type", "_" + std::to_string(type.parent_type)); + json_stream->emit_json_key_value("array_stride", get_decoration(type_id, DecorationArrayStride)); + } + else + { + json_stream->emit_json_key_array("members"); + // FIXME ideally we'd like to emit the size of a structure as a + // convenience to people parsing the reflected JSON. The problem + // is that there's no implicit size for a type. It's final size + // will be determined by the top level declaration in which it's + // included. So there might be one size for the struct if it's + // included in a std140 uniform block and another if it's included + // in a std430 uniform block. + // The solution is to include *all* potential sizes as a map of + // layout type name to integer, but that will probably require + // some additional logic being written in this class, or in the + // parent CompilerGLSL class. + auto size = type.member_types.size(); + for (uint32_t i = 0; i < size; ++i) + { + emit_type_member(type, i); + } + json_stream->end_json_array(); + } + + json_stream->end_json_object(); +} + +void CompilerReflection::emit_type_member(const SPIRType &type, uint32_t index) +{ + auto &membertype = get(type.member_types[index]); + json_stream->begin_json_object(); + auto name = to_member_name(type, index); + // FIXME we'd like to emit the offset of each member, but such offsets are + // context dependent. See the comment above regarding structure sizes + json_stream->emit_json_key_value("name", name); + + if (type_is_reference(membertype)) + { + json_stream->emit_json_key_value("type", "_" + std::to_string(membertype.parent_type)); + } + else if (membertype.basetype == SPIRType::Struct) + { + json_stream->emit_json_key_value("type", "_" + std::to_string(membertype.self)); + } + else + { + json_stream->emit_json_key_value("type", type_to_glsl(membertype)); + } + emit_type_member_qualifiers(type, index); + json_stream->end_json_object(); +} + +void CompilerReflection::emit_type_array(const SPIRType &type) +{ + if (!type_is_top_level_physical_pointer(type) && !type.array.empty()) + { + json_stream->emit_json_key_array("array"); + // Note that we emit the zeros here as a means of identifying + // unbounded arrays. This is necessary as otherwise there would + // be no way of differentiating between float[4] and float[4][] + for (const auto &value : type.array) + json_stream->emit_json_array_value(value); + json_stream->end_json_array(); + + json_stream->emit_json_key_array("array_size_is_literal"); + for (const auto &value : type.array_size_literal) + json_stream->emit_json_array_value(value); + json_stream->end_json_array(); + } +} + +void CompilerReflection::emit_type_member_qualifiers(const SPIRType &type, uint32_t index) +{ + auto &membertype = get(type.member_types[index]); + emit_type_array(membertype); + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + { + auto &dec = memb[index]; + if (dec.decoration_flags.get(DecorationLocation)) + json_stream->emit_json_key_value("location", dec.location); + if (dec.decoration_flags.get(DecorationOffset)) + json_stream->emit_json_key_value("offset", dec.offset); + + // Array stride is a property of the array type, not the struct. + if (has_decoration(type.member_types[index], DecorationArrayStride)) + json_stream->emit_json_key_value("array_stride", + get_decoration(type.member_types[index], DecorationArrayStride)); + + if (dec.decoration_flags.get(DecorationMatrixStride)) + json_stream->emit_json_key_value("matrix_stride", dec.matrix_stride); + if (dec.decoration_flags.get(DecorationRowMajor)) + json_stream->emit_json_key_value("row_major", true); + + if (type_is_top_level_physical_pointer(membertype)) + json_stream->emit_json_key_value("physical_pointer", true); + } +} + +string CompilerReflection::execution_model_to_str(spv::ExecutionModel model) +{ + switch (model) + { + case ExecutionModelVertex: + return "vert"; + case ExecutionModelTessellationControl: + return "tesc"; + case ExecutionModelTessellationEvaluation: + return "tese"; + case ExecutionModelGeometry: + return "geom"; + case ExecutionModelFragment: + return "frag"; + case ExecutionModelGLCompute: + return "comp"; + case ExecutionModelRayGenerationNV: + return "rgen"; + case ExecutionModelIntersectionNV: + return "rint"; + case ExecutionModelAnyHitNV: + return "rahit"; + case ExecutionModelClosestHitNV: + return "rchit"; + case ExecutionModelMissNV: + return "rmiss"; + case ExecutionModelCallableNV: + return "rcall"; + default: + return "???"; + } +} + +// FIXME include things like the local_size dimensions, geometry output vertex count, etc +void CompilerReflection::emit_entry_points() +{ + auto entries = get_entry_points_and_stages(); + if (!entries.empty()) + { + // Needed to make output deterministic. + sort(begin(entries), end(entries), [](const EntryPoint &a, const EntryPoint &b) -> bool { + if (a.execution_model < b.execution_model) + return true; + else if (a.execution_model > b.execution_model) + return false; + else + return a.name < b.name; + }); + + json_stream->emit_json_key_array("entryPoints"); + for (auto &e : entries) + { + json_stream->begin_json_object(); + json_stream->emit_json_key_value("name", e.name); + json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model)); + if (e.execution_model == ExecutionModelGLCompute) + { + const auto &spv_entry = get_entry_point(e.name, e.execution_model); + + SpecializationConstant spec_x, spec_y, spec_z; + get_work_group_size_specialization_constants(spec_x, spec_y, spec_z); + + json_stream->emit_json_key_array("workgroup_size"); + json_stream->emit_json_array_value(spec_x.id != ID(0) ? spec_x.constant_id : + spv_entry.workgroup_size.x); + json_stream->emit_json_array_value(spec_y.id != ID(0) ? spec_y.constant_id : + spv_entry.workgroup_size.y); + json_stream->emit_json_array_value(spec_z.id != ID(0) ? spec_z.constant_id : + spv_entry.workgroup_size.z); + json_stream->end_json_array(); + + json_stream->emit_json_key_array("workgroup_size_is_spec_constant_id"); + json_stream->emit_json_array_value(spec_x.id != ID(0)); + json_stream->emit_json_array_value(spec_y.id != ID(0)); + json_stream->emit_json_array_value(spec_z.id != ID(0)); + json_stream->end_json_array(); + } + json_stream->end_json_object(); + } + json_stream->end_json_array(); + } +} + +void CompilerReflection::emit_resources() +{ + auto res = get_shader_resources(); + emit_resources("subpass_inputs", res.subpass_inputs); + emit_resources("inputs", res.stage_inputs); + emit_resources("outputs", res.stage_outputs); + emit_resources("textures", res.sampled_images); + emit_resources("separate_images", res.separate_images); + emit_resources("separate_samplers", res.separate_samplers); + emit_resources("images", res.storage_images); + emit_resources("ssbos", res.storage_buffers); + emit_resources("ubos", res.uniform_buffers); + emit_resources("push_constants", res.push_constant_buffers); + emit_resources("counters", res.atomic_counters); + emit_resources("acceleration_structures", res.acceleration_structures); +} + +void CompilerReflection::emit_resources(const char *tag, const SmallVector &resources) +{ + if (resources.empty()) + { + return; + } + + json_stream->emit_json_key_array(tag); + for (auto &res : resources) + { + auto &type = get_type(res.type_id); + auto typeflags = ir.meta[type.self].decoration.decoration_flags; + auto &mask = get_decoration_bitset(res.id); + + // If we don't have a name, use the fallback for the type instead of the variable + // for SSBOs and UBOs since those are the only meaningful names to use externally. + // Push constant blocks are still accessed by name and not block name, even though they are technically Blocks. + bool is_push_constant = get_storage_class(res.id) == StorageClassPushConstant; + bool is_block = get_decoration_bitset(type.self).get(DecorationBlock) || + get_decoration_bitset(type.self).get(DecorationBufferBlock); + + ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id); + + json_stream->begin_json_object(); + + if (type.basetype == SPIRType::Struct) + { + json_stream->emit_json_key_value("type", "_" + std::to_string(res.base_type_id)); + } + else + { + json_stream->emit_json_key_value("type", type_to_glsl(type)); + } + + json_stream->emit_json_key_value("name", !res.name.empty() ? res.name : get_fallback_name(fallback_id)); + { + bool ssbo_block = type.storage == StorageClassStorageBuffer || + (type.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); + if (ssbo_block) + { + auto buffer_flags = get_buffer_block_flags(res.id); + if (buffer_flags.get(DecorationNonReadable)) + json_stream->emit_json_key_value("writeonly", true); + if (buffer_flags.get(DecorationNonWritable)) + json_stream->emit_json_key_value("readonly", true); + if (buffer_flags.get(DecorationRestrict)) + json_stream->emit_json_key_value("restrict", true); + if (buffer_flags.get(DecorationCoherent)) + json_stream->emit_json_key_value("coherent", true); + } + } + + emit_type_array(type); + + { + bool is_sized_block = is_block && (get_storage_class(res.id) == StorageClassUniform || + get_storage_class(res.id) == StorageClassUniformConstant || + get_storage_class(res.id) == StorageClassStorageBuffer); + if (is_sized_block) + { + uint32_t block_size = uint32_t(get_declared_struct_size(get_type(res.base_type_id))); + json_stream->emit_json_key_value("block_size", block_size); + } + } + + if (type.storage == StorageClassPushConstant) + json_stream->emit_json_key_value("push_constant", true); + if (mask.get(DecorationLocation)) + json_stream->emit_json_key_value("location", get_decoration(res.id, DecorationLocation)); + if (mask.get(DecorationRowMajor)) + json_stream->emit_json_key_value("row_major", true); + if (mask.get(DecorationColMajor)) + json_stream->emit_json_key_value("column_major", true); + if (mask.get(DecorationIndex)) + json_stream->emit_json_key_value("index", get_decoration(res.id, DecorationIndex)); + if (type.storage != StorageClassPushConstant && mask.get(DecorationDescriptorSet)) + json_stream->emit_json_key_value("set", get_decoration(res.id, DecorationDescriptorSet)); + if (mask.get(DecorationBinding)) + json_stream->emit_json_key_value("binding", get_decoration(res.id, DecorationBinding)); + if (mask.get(DecorationInputAttachmentIndex)) + json_stream->emit_json_key_value("input_attachment_index", + get_decoration(res.id, DecorationInputAttachmentIndex)); + if (mask.get(DecorationOffset)) + json_stream->emit_json_key_value("offset", get_decoration(res.id, DecorationOffset)); + + // For images, the type itself adds a layout qualifer. + // Only emit the format for storage images. + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + { + const char *fmt = format_to_glsl(type.image.format); + if (fmt != nullptr) + json_stream->emit_json_key_value("format", std::string(fmt)); + } + json_stream->end_json_object(); + } + json_stream->end_json_array(); +} + +void CompilerReflection::emit_specialization_constants() +{ + auto specialization_constants = get_specialization_constants(); + if (specialization_constants.empty()) + return; + + json_stream->emit_json_key_array("specialization_constants"); + for (const auto &spec_const : specialization_constants) + { + auto &c = get(spec_const.id); + auto type = get(c.constant_type); + json_stream->begin_json_object(); + json_stream->emit_json_key_value("name", get_name(spec_const.id)); + json_stream->emit_json_key_value("id", spec_const.constant_id); + json_stream->emit_json_key_value("type", type_to_glsl(type)); + json_stream->emit_json_key_value("variable_id", spec_const.id); + switch (type.basetype) + { + case SPIRType::UInt: + json_stream->emit_json_key_value("default_value", c.scalar()); + break; + + case SPIRType::Int: + json_stream->emit_json_key_value("default_value", c.scalar_i32()); + break; + + case SPIRType::Float: + json_stream->emit_json_key_value("default_value", c.scalar_f32()); + break; + + case SPIRType::Boolean: + json_stream->emit_json_key_value("default_value", c.scalar() != 0); + break; + + default: + break; + } + json_stream->end_json_object(); + } + json_stream->end_json_array(); +} + +string CompilerReflection::to_member_name(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.self); + + if (type_meta) + { + auto &memb = type_meta->members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_m", index); + } + else + return join("_m", index); +} diff --git a/dep/spirv-cross/spirv_reflect.hpp b/dep/spirv-cross/spirv_reflect.hpp new file mode 100644 index 000000000..d82c0de7d --- /dev/null +++ b/dep/spirv-cross/spirv_reflect.hpp @@ -0,0 +1,91 @@ +/* + * Copyright 2018-2020 Bradley Austin Davis + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at , or + * 2. The MIT License, found at . + * SPDX-License-Identifier: Apache-2.0 OR MIT. + */ + +#ifndef SPIRV_CROSS_REFLECT_HPP +#define SPIRV_CROSS_REFLECT_HPP + +#include "spirv_glsl.hpp" +#include + +namespace simple_json +{ +class Stream; +} + +namespace SPIRV_CROSS_NAMESPACE +{ +class CompilerReflection : public CompilerGLSL +{ + using Parent = CompilerGLSL; + +public: + explicit CompilerReflection(std::vector spirv_) + : Parent(std::move(spirv_)) + { + options.vulkan_semantics = true; + } + + CompilerReflection(const uint32_t *ir_, size_t word_count) + : Parent(ir_, word_count) + { + options.vulkan_semantics = true; + } + + explicit CompilerReflection(const ParsedIR &ir_) + : CompilerGLSL(ir_) + { + options.vulkan_semantics = true; + } + + explicit CompilerReflection(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) + { + options.vulkan_semantics = true; + } + + void set_format(const std::string &format); + std::string compile() override; + +private: + static std::string execution_model_to_str(spv::ExecutionModel model); + + void emit_entry_points(); + void emit_types(); + void emit_resources(); + void emit_specialization_constants(); + + void emit_type(uint32_t type_id, bool &emitted_open_tag); + void emit_type_member(const SPIRType &type, uint32_t index); + void emit_type_member_qualifiers(const SPIRType &type, uint32_t index); + void emit_type_array(const SPIRType &type); + void emit_resources(const char *tag, const SmallVector &resources); + bool type_is_reference(const SPIRType &type) const; + + std::string to_member_name(const SPIRType &type, uint32_t index) const; + + std::shared_ptr json_stream; +}; + +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/duckstation.sln b/duckstation.sln index 4a853475c..097b92513 100644 --- a/duckstation.sln +++ b/duckstation.sln @@ -61,6 +61,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "updater", "src\updater\upda EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vixl", "dep\vixl\vixl.vcxproj", "{8906836E-F06E-46E8-B11A-74E5E8C7B8FB}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spirv-cross", "dep\spirv-cross\spirv-cross.vcxproj", "{9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM64 = Debug|ARM64 @@ -729,6 +731,30 @@ Global {8906836E-F06E-46E8-B11A-74E5E8C7B8FB}.ReleaseLTCG|ARM64.Build.0 = ReleaseLTCG|ARM64 {8906836E-F06E-46E8-B11A-74E5E8C7B8FB}.ReleaseLTCG|x64.ActiveCfg = ReleaseLTCG|ARM64 {8906836E-F06E-46E8-B11A-74E5E8C7B8FB}.ReleaseLTCG|x86.ActiveCfg = ReleaseLTCG|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Debug|ARM64.Build.0 = Debug|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Debug|x64.ActiveCfg = Debug|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Debug|x64.Build.0 = Debug|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Debug|x86.ActiveCfg = Debug|Win32 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Debug|x86.Build.0 = Debug|Win32 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.DebugFast|ARM64.ActiveCfg = DebugFast|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.DebugFast|ARM64.Build.0 = DebugFast|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.DebugFast|x64.ActiveCfg = DebugFast|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.DebugFast|x64.Build.0 = DebugFast|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.DebugFast|x86.ActiveCfg = DebugFast|Win32 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.DebugFast|x86.Build.0 = DebugFast|Win32 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Release|ARM64.ActiveCfg = Release|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Release|ARM64.Build.0 = Release|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Release|x64.ActiveCfg = Release|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Release|x64.Build.0 = Release|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Release|x86.ActiveCfg = Release|Win32 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.Release|x86.Build.0 = Release|Win32 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.ReleaseLTCG|ARM64.ActiveCfg = ReleaseLTCG|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.ReleaseLTCG|ARM64.Build.0 = ReleaseLTCG|ARM64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.ReleaseLTCG|x64.ActiveCfg = ReleaseLTCG|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.ReleaseLTCG|x64.Build.0 = ReleaseLTCG|x64 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.ReleaseLTCG|x86.ActiveCfg = ReleaseLTCG|Win32 + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2}.ReleaseLTCG|x86.Build.0 = ReleaseLTCG|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -753,6 +779,7 @@ Global {7F909E29-4808-4BD9-A60C-56C51A3AAEC2} = {BA490C0E-497D-4634-A21E-E65012006385} {9C8DDEB0-2B8F-4F5F-BA86-127CDF27F035} = {BA490C0E-497D-4634-A21E-E65012006385} {8906836E-F06E-46E8-B11A-74E5E8C7B8FB} = {BA490C0E-497D-4634-A21E-E65012006385} + {9D2998E4-8CDB-47F4-B43C-1537ACECF9F2} = {BA490C0E-497D-4634-A21E-E65012006385} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {26E40B32-7C1D-48D0-95F4-1A500E054028}