diff --git a/deps/SPIRV-Cross/CMakeLists.txt b/deps/SPIRV-Cross/CMakeLists.txt index fc3e778397..aa33262ac8 100644 --- a/deps/SPIRV-Cross/CMakeLists.txt +++ b/deps/SPIRV-Cross/CMakeLists.txt @@ -287,7 +287,7 @@ if (SPIRV_CROSS_STATIC) endif() set(spirv-cross-abi-major 0) -set(spirv-cross-abi-minor 14) +set(spirv-cross-abi-minor 16) set(spirv-cross-abi-patch 0) if (SPIRV_CROSS_SHARED) @@ -457,6 +457,10 @@ if (SPIRV_CROSS_CLI) target_link_libraries(spirv-cross-msl-constexpr-test spirv-cross-c) set_target_properties(spirv-cross-msl-constexpr-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + add_executable(spirv-cross-msl-resource-binding-test tests-other/msl_resource_bindings.cpp) + target_link_libraries(spirv-cross-msl-resource-binding-test spirv-cross-c) + set_target_properties(spirv-cross-msl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")) target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra) endif() @@ -469,6 +473,8 @@ if (SPIRV_CROSS_CLI) COMMAND $) add_test(NAME spirv-cross-msl-constexpr-test COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_constexpr_test.spv) + add_test(NAME spirv-cross-msl-resource-binding-test + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_resource_binding.spv) add_test(NAME spirv-cross-test COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --parallel ${spirv-cross-externals} diff --git a/deps/SPIRV-Cross/main.cpp b/deps/SPIRV-Cross/main.cpp index 5931aef3f7..c441d23581 100644 --- a/deps/SPIRV-Cross/main.cpp +++ b/deps/SPIRV-Cross/main.cpp @@ -514,6 +514,7 @@ struct CLIArguments bool msl_domain_lower_left = false; bool msl_argument_buffers = false; bool msl_texture_buffer_native = false; + bool msl_multiview = false; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; bool emit_line_directives = false; @@ -592,6 +593,7 @@ static void print_help() "\t[--msl-argument-buffers]\n" "\t[--msl-texture-buffer-native]\n" "\t[--msl-discrete-descriptor-set ]\n" + "\t[--msl-multiview]\n" "\t[--hlsl]\n" "\t[--reflect]\n" "\t[--shader-model]\n" @@ -750,6 +752,7 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left; msl_opts.argument_buffers = args.msl_argument_buffers; msl_opts.texture_buffer_native = args.msl_texture_buffer_native; + msl_opts.multiview = args.msl_multiview; msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); @@ -1069,6 +1072,7 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-discrete-descriptor-set", [&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); }); cbs.add("--msl-texture-buffer-native", [&args](CLIParser &) { args.msl_texture_buffer_native = true; }); + cbs.add("--msl-multiview", [&args](CLIParser &) { args.msl_multiview = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp index 908fde0457..948806db4e 100644 --- a/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp +++ b/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp @@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value) kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) { - uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); diff --git a/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag index fa4953fdb2..ba66ccf626 100644 --- a/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag +++ b/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag @@ -30,16 +30,23 @@ void frag_main() case 0: { _30 = 3; + j = _30; + _31 = 0; + j = _31; + break; } default: { j = _30; _31 = 0; + j = _31; + break; } case 1: case 11: { j = _31; + break; } case 2: { @@ -59,6 +66,8 @@ void frag_main() } case 4: { + i = 0; + break; } case 5: { diff --git a/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp index e52fb209f4..4ebab8c734 100644 --- a/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp +++ b/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp @@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value) kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) { - uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); diff --git a/deps/SPIRV-Cross/spirv_cfg.cpp b/deps/SPIRV-Cross/spirv_cfg.cpp index cc1db1c737..ed31f23679 100644 --- a/deps/SPIRV-Cross/spirv_cfg.cpp +++ b/deps/SPIRV-Cross/spirv_cfg.cpp @@ -74,8 +74,14 @@ bool CFG::is_back_edge(uint32_t to) const // We have a back edge if the visit order is set with the temporary magic value 0. // Crossing edges will have already been recorded with a visit order. auto itr = visit_order.find(to); - assert(itr != end(visit_order)); - return itr->second.get() == 0; + return itr != end(visit_order) && itr->second.get() == 0; +} + +bool CFG::has_visited_forward_edge(uint32_t to) const +{ + // If > 0, we have visited the edge already, and this is not a back edge branch. + auto itr = visit_order.find(to); + return itr != end(visit_order) && itr->second.get() > 0; } bool CFG::post_order_visit(uint32_t block_id) @@ -83,8 +89,10 @@ bool CFG::post_order_visit(uint32_t block_id) // If we have already branched to this block (back edge), stop recursion. // If our branches are back-edges, we do not record them. // We have to record crossing edges however. - if (visit_order[block_id].get() >= 0) - return !is_back_edge(block_id); + if (has_visited_forward_edge(block_id)) + return true; + else if (is_back_edge(block_id)) + return false; // Block back-edges from recursively revisiting ourselves. visit_order[block_id].get() = 0; @@ -123,9 +131,39 @@ bool CFG::post_order_visit(uint32_t block_id) // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. - if (block.merge == SPIRBlock::MergeLoop) - if (post_order_visit(block.merge_block)) - add_branch(block_id, block.merge_block); + // We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG. + if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block)) + add_branch(block_id, block.merge_block); + + // If this is a selection merge, add an implied branch to the merge target. + // This is needed to avoid cases where an inner branch dominates the outer branch. + // This can happen if one of the branches exit early, e.g.: + // if (cond) { ...; break; } else { var = 100 } use_var(var); + // We can use the variable without a Phi since there is only one possible parent here. + // However, in this case, we need to hoist out the inner variable to outside the branch. + // Use same strategy as loops. + if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block)) + { + // If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup. + // Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement + // will be hoisted out to outside the selection merge. + // If size > 1, the variable will be automatically hoisted, so we should not mess with it. + // Adding fake branches unconditionally breaks parameter preservation analysis, + // which looks at how variables are accessed through the CFG. + auto pred_itr = preceding_edges.find(block.next_block); + if (pred_itr != end(preceding_edges)) + { + auto &pred = pred_itr->second; + if (pred.size() == 1 && *pred.begin() != block_id) + add_branch(block_id, block.next_block); + } + else + { + // If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it. + // We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge. + add_branch(block_id, block.next_block); + } + } // Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges. visit_order[block_id].get() = ++visit_count; diff --git a/deps/SPIRV-Cross/spirv_cfg.hpp b/deps/SPIRV-Cross/spirv_cfg.hpp index 2262eb79b0..7d07d48410 100644 --- a/deps/SPIRV-Cross/spirv_cfg.hpp +++ b/deps/SPIRV-Cross/spirv_cfg.hpp @@ -127,6 +127,7 @@ private: uint32_t visit_count = 0; bool is_back_edge(uint32_t to) const; + bool has_visited_forward_edge(uint32_t to) const; }; class DominatorBuilder diff --git a/deps/SPIRV-Cross/spirv_common.hpp b/deps/SPIRV-Cross/spirv_common.hpp index 1f9bce681e..d7a5f32eca 100644 --- a/deps/SPIRV-Cross/spirv_common.hpp +++ b/deps/SPIRV-Cross/spirv_common.hpp @@ -476,6 +476,7 @@ struct SPIRExtension : IVariant { Unsupported, GLSL, + SPV_debug_info, SPV_AMD_shader_ballot, SPV_AMD_shader_explicit_vertex_parameter, SPV_AMD_shader_trinary_minmax, diff --git a/deps/SPIRV-Cross/spirv_cross.cpp b/deps/SPIRV-Cross/spirv_cross.cpp index 4011c849bf..9fdfd1f00c 100644 --- a/deps/SPIRV-Cross/spirv_cross.cpp +++ b/deps/SPIRV-Cross/spirv_cross.cpp @@ -3079,6 +3079,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 case OpArrayLength: case OpLine: + case OpNoLine: // Uses literals, but cannot be a phi variable or temporary, so ignore. break; @@ -4266,6 +4267,7 @@ bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &resul case OpGroupCommitReadPipe: case OpGroupCommitWritePipe: case OpLine: + case OpNoLine: return false; default: diff --git a/deps/SPIRV-Cross/spirv_cross.hpp b/deps/SPIRV-Cross/spirv_cross.hpp index ccd7c04a38..c2dc4ea61b 100644 --- a/deps/SPIRV-Cross/spirv_cross.hpp +++ b/deps/SPIRV-Cross/spirv_cross.hpp @@ -258,7 +258,7 @@ public: size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const; // Returns the effective size of a buffer block struct member. - virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; + size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; // Returns a set of all global variables which are statically accessed // by the control flow graph from the current entry point. diff --git a/deps/SPIRV-Cross/spirv_cross_c.cpp b/deps/SPIRV-Cross/spirv_cross_c.cpp index 8f68a53886..8048274f00 100644 --- a/deps/SPIRV-Cross/spirv_cross_c.cpp +++ b/deps/SPIRV-Cross/spirv_cross_c.cpp @@ -1623,6 +1623,16 @@ spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler c return SPVC_SUCCESS; } +spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *size = compiler->compiler->get_declared_struct_member_size(*static_cast(struct_type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset) { SPVC_BEGIN_SAFE_SCOPE diff --git a/deps/SPIRV-Cross/spirv_cross_c.h b/deps/SPIRV-Cross/spirv_cross_c.h index afa5a832da..6efaf8543e 100644 --- a/deps/SPIRV-Cross/spirv_cross_c.h +++ b/deps/SPIRV-Cross/spirv_cross_c.h @@ -33,7 +33,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 14 +#define SPVC_C_API_VERSION_MINOR 16 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -303,6 +303,7 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b #define SPVC_MSL_PUSH_CONSTANT_BINDING (0) #define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u)) #define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u)) +#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u)) /* Obsolete. Sticks around for backwards compatibility. */ #define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1 @@ -659,6 +660,7 @@ SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_typ SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size); SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, spvc_type struct_type, size_t array_size, size_t *size); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size); SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset); diff --git a/deps/SPIRV-Cross/spirv_glsl.cpp b/deps/SPIRV-Cross/spirv_glsl.cpp index 44ccfc0540..c207eebf4f 100644 --- a/deps/SPIRV-Cross/spirv_glsl.cpp +++ b/deps/SPIRV-Cross/spirv_glsl.cpp @@ -832,8 +832,6 @@ void CompilerGLSL::emit_struct(SPIRType &type) string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) { string res; - if (flags.get(DecorationNonUniformEXT)) - res += "nonuniformEXT "; //if (flags & (1ull << DecorationSmooth)) // res += "smooth "; if (flags.get(DecorationFlat)) @@ -4602,7 +4600,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) SmallVector inherited_expressions; - uint32_t result_type = ops[0]; + uint32_t result_type_id = ops[0]; uint32_t id = ops[1]; uint32_t img = ops[2]; uint32_t coord = ops[3]; @@ -4613,8 +4611,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) bool fetch = false; const uint32_t *opt = nullptr; + auto &result_type = get(result_type_id); + inherited_expressions.push_back(coord); + // Make sure non-uniform decoration is back-propagated to where it needs to be. + if (has_decoration(img, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(img); + switch (op) { case OpImageSampleDrefImplicitLod: @@ -4771,14 +4775,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) image_is_depth = true; if (image_is_depth) - expr = remap_swizzle(get(result_type), 1, expr); + expr = remap_swizzle(result_type, 1, expr); + } + + if (!backend.support_small_type_sampling_result && result_type.width < 32) + { + // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. + // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. + expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); } // Deals with reads from MSL. We might need to downconvert to fewer components. if (op == OpImageRead) - expr = remap_swizzle(get(result_type), 4, expr); + expr = remap_swizzle(result_type, 4, expr); - emit_op(result_type, id, expr, forward); + emit_op(result_type_id, id, expr, forward); for (auto &inherit : inherited_expressions) inherit_expression_dependencies(id, inherit); @@ -7107,18 +7118,23 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) return expr; } +void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) +{ + if (var.allocate_temporary_copy) + { + auto &type = get(var.basetype); + auto &flags = get_decoration_bitset(var.self); + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + } +} + void CompilerGLSL::flush_variable_declaration(uint32_t id) { auto *var = maybe_get(id); if (var && var->deferred_declaration) { statement(variable_decl_function_local(*var), ";"); - if (var->allocate_temporary_copy) - { - auto &type = get(var->basetype); - auto &flags = ir.meta[id].decoration.decoration_flags; - statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), ";"); - } + emit_variable_temporary_copies(*var); var->deferred_declaration = false; } } @@ -7553,8 +7569,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) unroll_array_from_complex_load(id, ptr, expr); auto &type = get(result_type); - if (has_decoration(id, DecorationNonUniformEXT)) + // Shouldn't need to check for ID, but current glslang codegen requires it in some cases + // when loading Image/Sampler descriptors. It does not hurt to check ID as well. + if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT)) + { + propagate_nonuniform_qualifier(ptr); convert_non_uniform_expression(type, expr); + } if (ptr_expression) ptr_expression->need_transpose = old_need_transpose; @@ -7636,6 +7657,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto *var = maybe_get(ops[0]); + if (has_decoration(ops[0], DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(ops[0]); + if (var && var->statically_assigned) var->static_expression = ops[1]; else if (var && var->loop_variable && !var->loop_variable_enable) @@ -8018,7 +8042,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t rhs = ops[2]; bool pointer = get(result_type).pointer; - if (expression_is_lvalue(rhs) && !pointer) + auto *chain = maybe_get(rhs); + if (chain) + { + // Cannot lower to a SPIRExpression, just copy the object. + auto &e = set(id, *chain); + e.self = id; + } + else if (expression_is_lvalue(rhs) && !pointer) { // Need a copy. // For pointer types, we copy the pointer itself. @@ -8037,6 +8068,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto *var = maybe_get_backing_variable(rhs); e.loaded_from = var ? var->self : 0; } + + // If we're copying an access chain, need to inherit the read expressions. + auto *rhs_expr = maybe_get(rhs); + if (rhs_expr) + e.implied_read_expressions = rhs_expr->implied_read_expressions; } break; } @@ -8971,6 +9007,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_sampled_image_op(result_type, id, ops[2], ops[3]); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); break; } @@ -9392,6 +9430,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } + else if (get(extension_set).ext == SPIRExtension::SPV_debug_info) + { + break; // Ignore SPIR-V debug information extended instructions. + } else { statement("// unimplemented ext op ", instruction.op); @@ -9658,6 +9700,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpNoLine: + break; + default: statement("// unimplemented op ", instruction.op); break; @@ -9873,7 +9918,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) { - return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags); + auto &type = expression_type(id); + bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es; + if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) + { + // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. + auto &result_type = get(type.image.type); + if (result_type.width < 32) + return "mediump "; + } + return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); } string CompilerGLSL::to_qualifiers_glsl(uint32_t id) @@ -10089,15 +10143,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) switch (imagetype.basetype) { case SPIRType::Int: + case SPIRType::Short: + case SPIRType::SByte: res = "i"; break; case SPIRType::UInt: + case SPIRType::UShort: + case SPIRType::UByte: res = "u"; break; default: break; } + // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. + // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) return res + "subpassInput" + (type.image.ms ? "MS" : ""); @@ -11189,7 +11250,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method } default: - SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; } begin_scope(); @@ -11263,7 +11327,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method } default: - SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; } begin_scope(); @@ -11325,8 +11392,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) continue_type = continue_block_type(get(block.continue_block)); // If we have loop variables, stop masking out access to the variable now. - for (auto var : block.loop_variables) - get(var).loop_variable_enable = true; + for (auto var_id : block.loop_variables) + { + auto &var = get(var_id); + var.loop_variable_enable = true; + // We're not going to declare the variable directly, so emit a copy here. + emit_variable_temporary_copies(var); + } // Remember deferred declaration state. We will restore it before returning. SmallVector rearm_dominated_variables(block.dominated_variables.size()); @@ -11667,7 +11739,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) } auto &case_block = get(target_block); - if (i + 1 < num_blocks && + if (backend.support_case_fallthrough && i + 1 < num_blocks && execution_is_direct_branch(case_block, get(block_declaration_order[i + 1]))) { // We will fall through here, so just terminate the block chain early. @@ -12190,3 +12262,37 @@ void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) statement_no_indent("#line ", line_literal, " \"", get(file_id).str, "\""); } } + +void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id) +{ + // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen, + // we need to know NonUniformEXT a little earlier, when the resource is actually loaded. + // Back-propagate the qualifier based on the expression dependency chain. + + if (!has_decoration(id, DecorationNonUniformEXT)) + { + set_decoration(id, DecorationNonUniformEXT); + force_recompile(); + } + + auto *e = maybe_get(id); + auto *combined = maybe_get(id); + auto *chain = maybe_get(id); + if (e) + { + for (auto &expr : e->expression_dependencies) + propagate_nonuniform_qualifier(expr); + for (auto &expr : e->implied_read_expressions) + propagate_nonuniform_qualifier(expr); + } + else if (combined) + { + propagate_nonuniform_qualifier(combined->image); + propagate_nonuniform_qualifier(combined->sampler); + } + else if (chain) + { + for (auto &expr : chain->implied_read_expressions) + propagate_nonuniform_qualifier(expr); + } +} diff --git a/deps/SPIRV-Cross/spirv_glsl.hpp b/deps/SPIRV-Cross/spirv_glsl.hpp index 45a8654c52..a6fac92662 100644 --- a/deps/SPIRV-Cross/spirv_glsl.hpp +++ b/deps/SPIRV-Cross/spirv_glsl.hpp @@ -404,6 +404,8 @@ protected: bool array_is_value_type = true; bool comparison_image_samples_scalar = false; bool native_pointers = false; + bool support_small_type_sampling_result = false; + bool support_case_fallthrough = true; } backend; void emit_struct(SPIRType &type); @@ -432,6 +434,7 @@ protected: bool flush_phi_required(uint32_t from, uint32_t to); void flush_variable_declaration(uint32_t id); void flush_undeclared_variables(SPIRBlock &block); + void emit_variable_temporary_copies(const SPIRVariable &var); bool should_dereference(uint32_t id); bool should_forward(uint32_t id); @@ -669,6 +672,8 @@ protected: void fixup_type_alias(); void reorder_type_alias(); + void propagate_nonuniform_qualifier(uint32_t id); + private: void init(); }; diff --git a/deps/SPIRV-Cross/spirv_hlsl.cpp b/deps/SPIRV-Cross/spirv_hlsl.cpp index c129966fe4..57bbef8b81 100644 --- a/deps/SPIRV-Cross/spirv_hlsl.cpp +++ b/deps/SPIRV-Cross/spirv_hlsl.cpp @@ -1742,6 +1742,36 @@ void CompilerHLSL::emit_resources() end_scope(); statement(""); } + + if (requires_scalar_reflect) + { + // FP16/FP64? No templates in HLSL. + statement("float SPIRV_Cross_Reflect(float i, float n)"); + begin_scope(); + statement("return i - 2.0 * dot(n, i) * n;"); + end_scope(); + statement(""); + } + + if (requires_scalar_refract) + { + // FP16/FP64? No templates in HLSL. + statement("float SPIRV_Cross_Refract(float i, float n, float eta)"); + begin_scope(); + statement("float NoI = n * i;"); + statement("float NoI2 = NoI * NoI;"); + statement("float k = 1.0 - eta * eta * (1.0 - NoI2);"); + statement("if (k < 0.0)"); + begin_scope(); + statement("return 0.0;"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + } } string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) @@ -2449,6 +2479,10 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) inherited_expressions.push_back(coord); + // Make sure non-uniform decoration is back-propagated to where it needs to be. + if (has_decoration(img, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(img); + switch (op) { case OpImageSampleDrefImplicitLod: @@ -3245,6 +3279,45 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, break; } + case GLSLstd450Normalize: + // HLSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Returns -1 or 1 for valid input, sign() does the job. + emit_unary_func_op(result_type, id, args[0], "sign"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Reflect: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_reflect) + { + requires_scalar_reflect = true; + force_recompile(); + } + emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Refract: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_refract) + { + requires_scalar_refract = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; @@ -3389,6 +3462,9 @@ void CompilerHLSL::emit_load(const Instruction &instruction) uint32_t id = ops[1]; uint32_t ptr = ops[2]; + if (has_decoration(ptr, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(ptr); + auto load_expr = read_access_chain(*chain); bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); @@ -3422,6 +3498,9 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val // Make sure we trigger a read of the constituents in the access chain. track_expression_read(chain.self); + if (has_decoration(chain.self, DecorationNonUniformEXT)) + propagate_nonuniform_qualifier(chain.self); + SPIRType target_type; target_type.basetype = SPIRType::UInt; target_type.vecsize = type.vecsize; @@ -3954,6 +4033,31 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) break; } + case OpOuterProduct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; + + auto &type = get(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + case OpFMod: { if (!requires_op_fmod) @@ -4700,6 +4804,7 @@ string CompilerHLSL::compile() backend.can_declare_arrays_inline = false; backend.can_return_array = false; backend.nonuniform_qualifier = "NonUniformResourceIndex"; + backend.support_case_fallthrough = false; fixup_type_alias(); reorder_type_alias(); diff --git a/deps/SPIRV-Cross/spirv_hlsl.hpp b/deps/SPIRV-Cross/spirv_hlsl.hpp index d96c911f83..796f502c5d 100644 --- a/deps/SPIRV-Cross/spirv_hlsl.hpp +++ b/deps/SPIRV-Cross/spirv_hlsl.hpp @@ -167,6 +167,8 @@ private: void replace_illegal_names() override; Options hlsl_options; + + // TODO: Refactor this to be more similar to MSL, maybe have some common system in place? bool requires_op_fmod = false; bool requires_fp16_packing = false; bool requires_explicit_fp16_packing = false; @@ -179,6 +181,8 @@ private: bool requires_inverse_2x2 = false; bool requires_inverse_3x3 = false; bool requires_inverse_4x4 = false; + bool requires_scalar_reflect = false; + bool requires_scalar_refract = false; uint64_t required_textureSizeVariants = 0; void require_texture_query_variant(const SPIRType &type); diff --git a/deps/SPIRV-Cross/spirv_msl.cpp b/deps/SPIRV-Cross/spirv_msl.cpp index b3097ab826..e73d7cc47d 100644 --- a/deps/SPIRV-Cross/spirv_msl.cpp +++ b/deps/SPIRV-Cross/spirv_msl.cpp @@ -105,8 +105,10 @@ void CompilerMSL::build_implicit_builtins() active_input_builtins.get(BuiltInSubgroupLtMask); bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) || active_input_builtins.get(BuiltInSubgroupGtMask)); + bool need_multiview = get_execution_model() == ExecutionModelVertex && + (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex)); if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || - needs_subgroup_invocation_id) + need_multiview || needs_subgroup_invocation_id) { bool has_frag_coord = false; bool has_sample_id = false; @@ -118,6 +120,7 @@ void CompilerMSL::build_implicit_builtins() bool has_primitive_id = false; bool has_subgroup_invocation_id = false; bool has_subgroup_size = false; + bool has_view_idx = false; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin) @@ -189,6 +192,22 @@ void CompilerMSL::build_implicit_builtins() builtin_subgroup_size_id = var.self; has_subgroup_size = true; } + + if (need_multiview) + { + if (builtin == BuiltInInstanceIndex) + { + // The view index here is derived from the instance index. + builtin_instance_idx_id = var.self; + has_instance_idx = true; + } + + if (builtin == BuiltInViewIndex) + { + builtin_view_idx_id = var.self; + has_view_idx = true; + } + } }); if (!has_frag_coord && need_subpass_input) @@ -246,7 +265,8 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id); } - if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) + if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) || + (need_multiview && (!has_instance_idx || !has_view_idx))) { uint32_t offset = ir.increase_bound_by(2); uint32_t type_id = offset; @@ -265,7 +285,7 @@ void CompilerMSL::build_implicit_builtins() auto &ptr_type = set(type_ptr_id, uint_type_ptr); ptr_type.self = type_id; - if (!has_vertex_idx) + if (need_vertex_params && !has_vertex_idx) { uint32_t var_id = ir.increase_bound_by(1); @@ -276,7 +296,7 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id); } - if (!has_base_vertex) + if (need_vertex_params && !has_base_vertex) { uint32_t var_id = ir.increase_bound_by(1); @@ -287,7 +307,7 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id); } - if (!has_instance_idx) + if (!has_instance_idx) // Needed by both multiview and tessellation { uint32_t var_id = ir.increase_bound_by(1); @@ -296,9 +316,30 @@ void CompilerMSL::build_implicit_builtins() set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex); builtin_instance_idx_id = var_id; mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id); + + if (need_multiview) + { + // Multiview shaders are not allowed to write to gl_Layer, ostensibly because + // it is implicitly written from gl_ViewIndex, but we have to do that explicitly. + // Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but + // gl_Layer is an output in vertex-pipeline shaders. + uint32_t type_ptr_out_id = ir.increase_bound_by(2); + SPIRType uint_type_ptr_out; + uint_type_ptr_out = uint_type; + uint_type_ptr_out.pointer = true; + uint_type_ptr_out.parent_type = type_id; + uint_type_ptr_out.storage = StorageClassOutput; + auto &ptr_out_type = set(type_ptr_out_id, uint_type_ptr_out); + ptr_out_type.self = type_id; + var_id = type_ptr_out_id + 1; + set(var_id, type_ptr_out_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInLayer); + builtin_layer_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id); + } } - if (!has_base_instance) + if (need_vertex_params && !has_base_instance) { uint32_t var_id = ir.increase_bound_by(1); @@ -308,6 +349,17 @@ void CompilerMSL::build_implicit_builtins() builtin_base_instance_id = var_id; mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id); } + + if (need_multiview && !has_view_idx) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_ViewIndex. + set(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex); + builtin_view_idx_id = var_id; + mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id); + } } if (need_tesc_params && (!has_invocation_id || !has_primitive_id)) @@ -428,6 +480,17 @@ void CompilerMSL::build_implicit_builtins() set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index); buffer_size_buffer_id = var_id; } + + if (needs_view_mask_buffer()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvViewMask"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(4u)); + set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index); + view_mask_buffer_id = var_id; + } } void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id) @@ -708,10 +771,15 @@ string CompilerMSL::compile() backend.comparison_image_samples_scalar = true; backend.native_pointers = true; backend.nonuniform_qualifier = ""; + backend.support_small_type_sampling_result = true; capture_output_to_buffer = msl_options.capture_output_to_buffer; is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; + // Initialize array here rather than constructor, MSVC 2013 workaround. + for (auto &id : next_metal_resource_ids) + id = 0; + fixup_type_alias(); replace_illegal_names(); @@ -731,6 +799,10 @@ string CompilerMSL::compile() active_interface_variables.insert(swizzle_buffer_id); if (buffer_size_buffer_id) active_interface_variables.insert(buffer_size_buffer_id); + if (view_mask_buffer_id) + active_interface_variables.insert(view_mask_buffer_id); + if (builtin_layer_id) + active_interface_variables.insert(builtin_layer_id); // Create structs to hold input, output and uniform variables. // Do output first to ensure out. is declared at top of entry function. @@ -1020,7 +1092,6 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: added_out = true; } type_id = get(arg_id).basetype; - p_type = &get(type_id); uint32_t next_id = ir.increase_bound_by(1); func.add_parameter(type_id, next_id, true); set(next_id, type_id, StorageClassFunction, 0, arg_id); @@ -2370,7 +2441,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type) // Increment the current offset to be positioned immediately after the current member. // Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here. if (mbr_idx + 1 < mbr_cnt) - curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx)); + curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size_msl(ib_type, mbr_idx)); } } @@ -3122,6 +3193,36 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplReflectScalar: + // Metal does not support scalar versions of these functions. + statement("template"); + statement("inline T spvReflect(T i, T n)"); + begin_scope(); + statement("return i - T(2) * i * n * n;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRefractScalar: + // Metal does not support scalar versions of these functions. + statement("template"); + statement("inline T spvRefract(T i, T n, T eta)"); + begin_scope(); + statement("T NoI = n * i;"); + statement("T NoI2 = NoI * NoI;"); + statement("T k = T(1) - eta * eta * (T(1) - NoI2);"); + statement("if (k < T(0))"); + begin_scope(); + statement("return T(0);"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + break; + default: break; } @@ -4096,7 +4197,30 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } - // OpOuterProduct + case OpOuterProduct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; + + auto &type = get(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } case OpIAddCarry: case OpISubBorrow: @@ -4582,6 +4706,57 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, // GLSLstd450InterpolateAtSample (sample_no_perspective qualifier) // GLSLstd450InterpolateAtOffset + case GLSLstd450Distance: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Equivalent to length(a - b) -> abs(a - b). + emit_op(result_type, id, + join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"), + should_forward(args[0]) && should_forward(args[1])); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Length: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Equivalent to abs(). + emit_unary_func_op(result_type, id, args[0], "abs"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Normalize: + // MSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Returns -1 or 1 for valid input, sign() does the job. + emit_unary_func_op(result_type, id, args[0], "sign"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Reflect: + if (get(result_type).vecsize == 1) + emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + + case GLSLstd450Refract: + if (get(result_type).vecsize == 1) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; @@ -4766,6 +4941,21 @@ string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool return fname; } +string CompilerMSL::convert_to_f32(const string &expr, uint32_t components) +{ + SPIRType t; + t.basetype = SPIRType::Float; + t.vecsize = components; + t.columns = 1; + return join(type_to_glsl_constructor(t), "(", expr, ")"); +} + +static inline bool sampling_type_needs_f32_conversion(const SPIRType &type) +{ + // Double is not supported to begin with, but doesn't hurt to check for completion. + return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double; +} + // Returns the function args for a texture sampling function for the specified image and sampling characteristics. string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y, @@ -4804,6 +4994,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (is_fetch) tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 1); alt_coord_component = 1; break; @@ -4839,6 +5031,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (is_fetch) tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 2); alt_coord_component = 2; break; @@ -4849,6 +5043,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (is_fetch) tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + else if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 3); alt_coord_component = 3; break; @@ -4866,6 +5062,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool tex_coords = enclose_expression(tex_coords) + ".xyz"; } + if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords = convert_to_f32(tex_coords, 3); + alt_coord_component = 3; break; @@ -4896,7 +5095,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool // If projection, use alt coord as divisor if (is_proj) - tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component); + { + if (sampling_type_needs_f32_conversion(coord_type)) + tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1); + else + tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component); + } if (!farg_str.empty()) farg_str += ", "; @@ -4930,11 +5134,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool forward = forward && should_forward(dref); farg_str += ", "; + auto &dref_type = expression_type(dref); + + string dref_expr; if (is_proj) - farg_str += - to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component); + dref_expr = + join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component)); else - farg_str += to_expression(dref); + dref_expr = to_expression(dref); + + if (sampling_type_needs_f32_conversion(dref_type)) + dref_expr = convert_to_f32(dref_expr, 1); + + farg_str += dref_expr; if (msl_options.is_macos() && (grad_x || grad_y)) { @@ -5626,6 +5838,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in { switch (builtin) { + case BuiltInViewIndex: + if (!msl_options.multiview) + break; + /* fallthrough */ case BuiltInFrontFacing: case BuiltInPointCoord: case BuiltInFragCoord: @@ -6013,7 +6229,12 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask && bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV && bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask && - bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask) + bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask && + ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) || + bi_type != BuiltInViewIndex) && + (get_execution_model() == ExecutionModelGLCompute || + (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) || + (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize))) { if (!ep_args.empty()) ep_args += ", "; @@ -6090,6 +6311,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) { string ep_args = entry_point_arg_stage_in(); + Bitset claimed_bindings; for (uint32_t i = 0; i < kMaxArgumentBuffers; i++) { @@ -6104,12 +6326,30 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id); - ep_args += " [[buffer(" + convert_to_string(i) + ")]]"; + // Check if the argument buffer binding itself has been remapped. + uint32_t buffer_binding; + auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding }); + if (itr != end(resource_bindings)) + { + buffer_binding = itr->second.first.msl_buffer; + itr->second.second = true; + } + else + { + // As a fallback, directly map desc set <-> binding. + // If that was taken, take the next buffer binding. + if (claimed_bindings.get(i)) + buffer_binding = next_metal_resource_index_buffer; + else + buffer_binding = i; + } - // Makes it more practical for testing, since the push constant block can occupy the first available - // buffer slot if it's not bound explicitly. - next_metal_resource_index_buffer = i + 1; + claimed_bindings.set(buffer_binding); + + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id); + ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]"; + + next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1); } entry_point_args_discrete_descriptors(ep_args); @@ -6401,6 +6641,50 @@ void CompilerMSL::fix_up_shader_inputs_outputs() entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); }); } break; + case BuiltInSubgroupLocalInvocationId: + // This is natively supported in compute shaders. + if (get_execution_model() == ExecutionModelGLCompute) + break; + + // This is natively supported in fragment shaders in MSL 2.2. + if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) + break; + + if (msl_options.is_ios()) + SPIRV_CROSS_THROW( + "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS."); + + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW( + "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1."); + + // Shaders other than compute shaders don't support the SIMD-group + // builtins directly, but we can emulate them using the SIMD-group + // functions. This might break if some of the subgroup terminated + // before reaching the entry point. + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), + " = simd_prefix_exclusive_sum(1);"); + }); + break; + case BuiltInSubgroupSize: + // This is natively supported in compute shaders. + if (get_execution_model() == ExecutionModelGLCompute) + break; + + // This is natively supported in fragment shaders in MSL 2.2. + if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) + break; + + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS."); + + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1."); + + entry_func.fixup_hooks_in.push_back( + [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); }); + break; case BuiltInSubgroupEqMask: if (msl_options.is_ios()) SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS."); @@ -6408,7 +6692,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1."); entry_func.fixup_hooks_in.push_back([=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", - builtin_subgroup_invocation_id_id, " > 32 ? uint4(0, (1 << (", + to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (", to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ", to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));"); }); @@ -6489,6 +6773,44 @@ void CompilerMSL::fix_up_shader_inputs_outputs() to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));"); }); break; + case BuiltInViewIndex: + if (!msl_options.multiview) + { + // According to the Vulkan spec, when not running under a multiview + // render pass, ViewIndex is 0. + entry_func.fixup_hooks_in.push_back([=]() { + statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;"); + }); + } + else if (get_execution_model() == ExecutionModelFragment) + { + // Because we adjusted the view index in the vertex shader, we have to + // adjust it back here. + entry_func.fixup_hooks_in.push_back([=]() { + statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];"); + }); + } + else if (get_execution_model() == ExecutionModelVertex) + { + // Metal provides no special support for multiview, so we smuggle + // the view index in the instance index. + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id), + " % ", to_expression(view_mask_buffer_id), "[1];"); + statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id), + "[1];"); + }); + // In addition to setting the variable itself, we also need to + // set the render_target_array_index with it on output. We have to + // offset this by the base view index, because Metal isn't in on + // our little game here. + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ", + to_expression(view_mask_buffer_id), "[0];"); + }); + } + break; default: break; } @@ -7790,6 +8112,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case BuiltInSamplePosition: // Shouldn't be reached. SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL."); + case BuiltInViewIndex: + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders."); + // The ViewIndex was implicitly used in the prior stages to set the render_target_array_index, + // so we can get it from there. + return "render_target_array_index"; // Fragment function out case BuiltInFragDepth: @@ -7820,7 +8148,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) return "thread_index_in_threadgroup"; case BuiltInSubgroupSize: - return "thread_execution_width"; + if (execution.model == ExecutionModelFragment) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders."); + return "threads_per_simdgroup"; + } + else + { + // thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0, + // but not in fragment. + return "thread_execution_width"; + } case BuiltInNumSubgroups: if (!msl_options.supports_msl_version(2)) @@ -7833,9 +8172,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup"; case BuiltInSubgroupLocalInvocationId: - if (!msl_options.supports_msl_version(2)) - SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); - return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup"; + if (execution.model == ExecutionModelFragment) + { + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders."); + return "thread_index_in_simdgroup"; + } + else + { + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0."); + return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup"; + } case BuiltInSubgroupEqMask: case BuiltInSubgroupGeMask: @@ -7937,6 +8285,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) return "uint"; case BuiltInSamplePosition: return "float2"; + case BuiltInViewIndex: + return "uint"; // Fragment function out case BuiltInFragDepth: @@ -7992,7 +8342,7 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma) } // Returns the byte size of a struct member. -size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const { auto &type = get(struct_type.member_types[index]); @@ -8358,7 +8708,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o uint32_t extension_set = args[2]; if (compiler.get(extension_set).ext == SPIRExtension::GLSL) { - GLSLstd450 op_450 = static_cast(args[3]); + auto op_450 = static_cast(args[3]); switch (op_450) { case GLSLstd450Radians: @@ -8373,6 +8723,22 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o return SPVFuncImplFindUMsb; case GLSLstd450SSign: return SPVFuncImplSSign; + case GLSLstd450Reflect: + { + auto &type = compiler.get(args[0]); + if (type.vecsize == 1) + return SPVFuncImplReflectScalar; + else + return SPVFuncImplNone; + } + case GLSLstd450Refract: + { + auto &type = compiler.get(args[0]); + if (type.vecsize == 1) + return SPVFuncImplRefractScalar; + else + return SPVFuncImplNone; + } case GLSLstd450MatrixInverse: { auto &mat_type = compiler.get(args[0]); @@ -8519,6 +8885,9 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp case BuiltInViewportIndex: case BuiltInFragStencilRefEXT: case BuiltInPrimitiveId: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + case BuiltInViewIndex: expected_type = SPIRType::UInt; break; @@ -8561,6 +8930,7 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr case BuiltInViewportIndex: case BuiltInFragStencilRefEXT: case BuiltInPrimitiveId: + case BuiltInViewIndex: expected_type = SPIRType::UInt; break; diff --git a/deps/SPIRV-Cross/spirv_msl.hpp b/deps/SPIRV-Cross/spirv_msl.hpp index 5a059235d6..13ed36ef9a 100644 --- a/deps/SPIRV-Cross/spirv_msl.hpp +++ b/deps/SPIRV-Cross/spirv_msl.hpp @@ -161,6 +161,12 @@ static const uint32_t kSwizzleBufferBinding = ~(1u); // element to indicate the buffer binding for buffer size buffers to support OpArrayLength. static const uint32_t kBufferSizeBufferBinding = ~(2u); +// Special constant used in a MSLResourceBinding binding +// element to indicate the buffer binding used for the argument buffer itself. +// This buffer binding should be kept as small as possible as all automatic bindings for buffers +// will start at max(kArgumentBufferBinding) + 1. +static const uint32_t kArgumentBufferBinding = ~(3u); + static const uint32_t kMaxArgumentBuffers = 8; // Decompiles SPIR-V to Metal Shading Language @@ -185,12 +191,14 @@ public: uint32_t shader_patch_output_buffer_index = 27; uint32_t shader_tess_factor_buffer_index = 26; uint32_t buffer_size_buffer_index = 25; + uint32_t view_mask_buffer_index = 24; uint32_t shader_input_wg_index = 0; bool enable_point_size_builtin = true; bool disable_rasterization = false; bool capture_output_to_buffer = false; bool swizzle_texture_samples = false; bool tess_domain_origin_lower_left = false; + bool multiview = false; // Enable use of MSL 2.0 indirect argument buffers. // MSL 2.0 must also be enabled. @@ -262,6 +270,13 @@ public: return !buffers_requiring_array_length.empty(); } + // Provide feedback to calling API to allow it to pass a buffer + // containing the view mask for the current multiview subpass. + bool needs_view_mask_buffer() const + { + return msl_options.multiview; + } + // Provide feedback to calling API to allow it to pass an output // buffer if the shader needs it. bool needs_output_buffer() const @@ -386,6 +401,8 @@ protected: SPVFuncImplSubgroupBallotFindMSB, SPVFuncImplSubgroupBallotBitCount, SPVFuncImplSubgroupAllEqual, + SPVFuncImplReflectScalar, + SPVFuncImplRefractScalar, SPVFuncImplArrayCopyMultidimMax = 6 }; @@ -406,7 +423,7 @@ protected: std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; std::string sampler_type(const SPIRType &type); std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; - size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override; + size_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; std::string to_func_call_arg(uint32_t id) override; std::string to_name(uint32_t id, bool allow_alias = true) const override; std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, @@ -520,12 +537,15 @@ protected: uint32_t builtin_base_vertex_id = 0; uint32_t builtin_instance_idx_id = 0; uint32_t builtin_base_instance_id = 0; + uint32_t builtin_view_idx_id = 0; + uint32_t builtin_layer_id = 0; uint32_t builtin_invocation_id_id = 0; uint32_t builtin_primitive_id_id = 0; uint32_t builtin_subgroup_invocation_id_id = 0; uint32_t builtin_subgroup_size_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; + uint32_t view_mask_buffer_id = 0; void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; @@ -538,6 +558,8 @@ protected: void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); + std::string convert_to_f32(const std::string &expr, uint32_t components); + Options msl_options; std::set spv_function_implementations; std::unordered_map vtx_attrs_by_location; @@ -571,10 +593,12 @@ protected: }; std::unordered_map, InternalHasher> resource_bindings; + uint32_t next_metal_resource_index_buffer = 0; uint32_t next_metal_resource_index_texture = 0; uint32_t next_metal_resource_index_sampler = 0; - uint32_t next_metal_resource_ids[kMaxArgumentBuffers] = {}; + // Intentionally uninitialized, works around MSVC 2013 bug. + uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; uint32_t stage_in_var_id = 0; uint32_t stage_out_var_id = 0; diff --git a/deps/SPIRV-Cross/spirv_parser.cpp b/deps/SPIRV-Cross/spirv_parser.cpp index af0b2b35d7..a271eadff3 100644 --- a/deps/SPIRV-Cross/spirv_parser.cpp +++ b/deps/SPIRV-Cross/spirv_parser.cpp @@ -162,7 +162,6 @@ void Parser::parse(const Instruction &instruction) case OpSourceContinued: case OpSourceExtension: case OpNop: - case OpNoLine: case OpModuleProcessed: break; @@ -244,6 +243,8 @@ void Parser::parse(const Instruction &instruction) auto ext = extract_string(ir.spirv, instruction.offset + 1); if (ext == "GLSL.std.450") set(id, SPIRExtension::GLSL); + else if (ext == "DebugInfo") + set(id, SPIRExtension::SPV_debug_info); else if (ext == "SPV_AMD_shader_ballot") set(id, SPIRExtension::SPV_AMD_shader_ballot); else if (ext == "SPV_AMD_shader_explicit_vertex_parameter") @@ -260,6 +261,14 @@ void Parser::parse(const Instruction &instruction) break; } + case OpExtInst: + { + // The SPIR-V debug information extended instructions might come at global scope. + if (current_block) + current_block->ops.push_back(instruction); + break; + } + case OpEntryPoint: { auto itr = @@ -1057,6 +1066,14 @@ void Parser::parse(const Instruction &instruction) break; } + case OpNoLine: + { + // OpNoLine might come at global scope. + if (current_block) + current_block->ops.push_back(instruction); + break; + } + // Actual opcodes. default: { diff --git a/deps/SPIRV-Cross/test_shaders.py b/deps/SPIRV-Cross/test_shaders.py index 2df8e59378..d2f75e8015 100755 --- a/deps/SPIRV-Cross/test_shaders.py +++ b/deps/SPIRV-Cross/test_shaders.py @@ -203,6 +203,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): msl_args.append('3') if '.line.' in shader: msl_args.append('--emit-line-directives') + if '.multiview.' in shader: + msl_args.append('--msl-multiview') subprocess.check_call(msl_args) diff --git a/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag new file mode 100644 index 0000000000..322c86bed3 --- /dev/null +++ b/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag @@ -0,0 +1,29 @@ +Texture2D uTexture : register(t0); +SamplerState _uTexture_sampler : register(s0); + +static min16float4 FragColor; +static min16float2 UV; + +struct SPIRV_Cross_Input +{ + min16float2 UV : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + min16float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV)); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + UV = stage_input.UV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/comp/outer-product.comp b/reference/opt/shaders-hlsl/comp/outer-product.comp new file mode 100644 index 0000000000..71613d4f15 --- /dev/null +++ b/reference/opt/shaders-hlsl/comp/outer-product.comp @@ -0,0 +1,48 @@ +RWByteAddressBuffer _21 : register(u0); +ByteAddressBuffer _26 : register(t1); + +void comp_main() +{ + float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y); + _21.Store2(0, asuint(_32[0])); + _21.Store2(8, asuint(_32[1])); + float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y); + _21.Store3(16, asuint(_41[0])); + _21.Store3(32, asuint(_41[1])); + float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y); + _21.Store4(48, asuint(_50[0])); + _21.Store4(64, asuint(_50[1])); + float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z); + _21.Store2(80, asuint(_58[0])); + _21.Store2(88, asuint(_58[1])); + _21.Store2(96, asuint(_58[2])); + float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z); + _21.Store3(112, asuint(_66[0])); + _21.Store3(128, asuint(_66[1])); + _21.Store3(144, asuint(_66[2])); + float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z); + _21.Store4(160, asuint(_74[0])); + _21.Store4(176, asuint(_74[1])); + _21.Store4(192, asuint(_74[2])); + float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w); + _21.Store2(208, asuint(_82[0])); + _21.Store2(216, asuint(_82[1])); + _21.Store2(224, asuint(_82[2])); + _21.Store2(232, asuint(_82[3])); + float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w); + _21.Store3(240, asuint(_90[0])); + _21.Store3(256, asuint(_90[1])); + _21.Store3(272, asuint(_90[2])); + _21.Store3(288, asuint(_90[3])); + float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w); + _21.Store4(304, asuint(_98[0])); + _21.Store4(320, asuint(_98[1])); + _21.Store4(336, asuint(_98[2])); + _21.Store4(352, asuint(_98[3])); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..47f2fe4107 --- /dev/null +++ b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,14 @@ +RWByteAddressBuffer _9 : register(u0); + +void comp_main() +{ + _9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4))))); + _9.Store(12, asuint(length(asfloat(_9.Load(0))))); + _9.Store(16, asuint(sign(asfloat(_9.Load(0))))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag b/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..f37c1fcc4b --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,25 @@ +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = 0.0f.xxxx; + for (int _43 = 0; _43 < 3; ) + { + FragColor[_43] += float(_43); + _43++; + continue; + } +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..0fb694c543 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag @@ -0,0 +1,49 @@ +static float FragColor; +static float3 vRefract; + +struct SPIRV_Cross_Input +{ + float3 vRefract : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +float SPIRV_Cross_Reflect(float i, float n) +{ + return i - 2.0 * dot(n, i) * n; +} + +float SPIRV_Cross_Refract(float i, float n, float eta) +{ + float NoI = n * i; + float NoI2 = NoI * NoI; + float k = 1.0 - eta * eta * (1.0 - NoI2); + if (k < 0.0) + { + return 0.0; + } + else + { + return eta * i - (eta * NoI + sqrt(k)) * n; + } +} + +void frag_main() +{ + FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z); + FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y); + FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; + FragColor += reflect(vRefract.xy, vRefract.zy).y; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vRefract = stage_input.vRefract; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag new file mode 100644 index 0000000000..97345820b6 --- /dev/null +++ b/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + half4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + half2 UV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV))); + return out; +} + diff --git a/reference/opt/shaders-msl/comp/outer-product.comp b/reference/opt/shaders-msl/comp/outer-product.comp new file mode 100644 index 0000000000..8e32db392e --- /dev/null +++ b/reference/opt/shaders-msl/comp/outer-product.comp @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float2x2 m22; + float2x3 m23; + float2x4 m24; + float3x2 m32; + float3x3 m33; + float3x4 m34; + float4x2 m42; + float4x3 m43; + float4x4 m44; +}; + +struct ReadSSBO +{ + float2 v2; + float3 v3; + float4 v4; +}; + +kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]]) +{ + _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y); + _21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y); + _21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y); + _21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z); + _21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z); + _21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z); + _21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w); + _21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w); + _21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w); +} + diff --git a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..312a6f9453 --- /dev/null +++ b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,21 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float a; + float b; + float c; + float d; + float e; +}; + +kernel void main0(device SSBO& _9 [[buffer(0)]]) +{ + _9.c = abs(_9.a - _9.b); + _9.d = abs(_9.a); + _9.e = sign(_9.a); +} + diff --git a/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag b/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..b4cd2085e3 --- /dev/null +++ b/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,23 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = float4(0.0); + for (int _43 = 0; _43 < 3; ) + { + out.FragColor[_43] += float(_43); + _43++; + continue; + } + return out; +} + diff --git a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..592d445810 --- /dev/null +++ b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vRefract [[user(locn0)]]; +}; + +template +inline T spvReflect(T i, T n) +{ + return i - T(2) * i * n * n; +} + +template +inline T spvRefract(T i, T n, T eta) +{ + T NoI = n * i; + T NoI2 = NoI * NoI; + T k = T(1) - eta * eta * (T(1) - NoI2); + if (k < T(0)) + { + return T(0); + } + else + { + return eta * i - (eta * NoI + sqrt(k)) * n; + } +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z); + out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y); + out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y; + out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag b/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag new file mode 100644 index 0000000000..03a536f0c6 --- /dev/null +++ b/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + uint2 FragColor [[color(0)]]; +}; + +fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) +{ + main0_out out = {}; + out.FragColor.x = gl_SubgroupSize; + out.FragColor.y = gl_SubgroupInvocationID; + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag new file mode 100644 index 0000000000..23c554940b --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag @@ -0,0 +1,32 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; + float2 vTex_0 [[user(locn1)]]; + float2 vTex_1 [[user(locn2)]]; + float2 vTex_2 [[user(locn3)]]; + float2 vTex_3 [[user(locn4)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) +{ + main0_out out = {}; + float2 vTex[4] = {}; + vTex[0] = in.vTex_0; + vTex[1] = in.vTex_1; + vTex[2] = in.vTex_2; + vTex[3] = in.vTex_3; + gl_ViewIndex += spvViewMask[0]; + out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]); + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag new file mode 100644 index 0000000000..fc9c4fcdd1 --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag @@ -0,0 +1,89 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +inline uint4 spvSubgroupBallot(bool value) +{ + simd_vote vote = simd_ballot(value); + // simd_ballot() returns a 64-bit integer-like object, but + // SPIR-V callers expect a uint4. We must convert. + // FIXME: This won't include higher bits if Apple ever supports + // 128 lanes in an SIMD-group. + return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0); +} + +inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) +{ + return !!extract_bits(ballot[bit / 32], bit % 32, 1); +} + +inline uint spvSubgroupBallotFindLSB(uint4 ballot) +{ + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot) +{ + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvSubgroupBallotBitCount(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvSubgroupBallotBitCount(ballot & mask); +} + +template +inline bool spvSubgroupAllEqual(T value) +{ + return simd_all(value == simd_broadcast_first(value)); +} + +template<> +inline bool spvSubgroupAllEqual(bool value) +{ + return simd_all(value) || !simd_any(value); +} + +fragment main0_out main0() +{ + main0_out out = {}; + uint gl_SubgroupSize = simd_sum(1); + uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1); + uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); + uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); + uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + out.FragColor = float(gl_SubgroupSize); + out.FragColor = float(gl_SubgroupInvocationID); + out.FragColor = float4(gl_SubgroupEqMask).x; + out.FragColor = float4(gl_SubgroupGeMask).x; + out.FragColor = float4(gl_SubgroupGtMask).x; + out.FragColor = float4(gl_SubgroupLeMask).x; + out.FragColor = float4(gl_SubgroupLtMask).x; + uint4 _63 = spvSubgroupBallot(true); + float4 _147 = simd_prefix_inclusive_product(simd_product(float4(20.0))); + int4 _149 = simd_prefix_inclusive_product(simd_product(int4(20))); + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert new file mode 100644 index 0000000000..c42e67211e --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct MVPs +{ + float4x4 MVP[2]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; + uint gl_Layer [[render_target_array_index]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1]; + gl_InstanceIndex /= spvViewMask[1]; + out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; + out.gl_Layer = gl_ViewIndex - spvViewMask[0]; + return out; +} + diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert new file mode 100644 index 0000000000..f87d2a11ad --- /dev/null +++ b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct MVPs +{ + float4x4 MVP[2]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; + uint gl_Layer [[render_target_array_index]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + const uint gl_ViewIndex = 0; + out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; + return out; +} + diff --git a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp new file mode 100644 index 0000000000..9ae8d6fd7f --- /dev/null +++ b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp @@ -0,0 +1,25 @@ +#version 450 +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture; + +void main() +{ + int _30; + _30 = 7; + int _27_copy; + for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy) + { + if (5.0 > float(_27)) + { + break; + } + else + { + continue; + } + continue; + } + imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0)); +} + diff --git a/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp new file mode 100644 index 0000000000..7755593f57 --- /dev/null +++ b/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp @@ -0,0 +1,18 @@ +#version 430 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _8_9 +{ + float _m0[]; +} _9; + +layout(binding = 1, std430) buffer _8_10 +{ + float _m0[]; +} _10; + +void main() +{ + _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x]; +} + diff --git a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag new file mode 100644 index 0000000000..e0feb49327 --- /dev/null +++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag @@ -0,0 +1,19 @@ +#version 450 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for FP16. +#endif + +layout(binding = 0) uniform sampler2D uTexture; + +layout(location = 0) out f16vec4 FragColor; +layout(location = 0) in f16vec2 UV; + +void main() +{ + FragColor = f16vec4(texture(uTexture, UV)); +} + diff --git a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk new file mode 100644 index 0000000000..e13e4254ea --- /dev/null +++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk @@ -0,0 +1,20 @@ +#version 450 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require + +layout(set = 0, binding = 0) uniform sampler2D uTexture; + +layout(location = 0) out f16vec4 FragColor; +layout(location = 0) in f16vec2 UV; + +void main() +{ + FragColor = f16vec4(texture(uTexture, UV)); +} + diff --git a/reference/opt/shaders/comp/outer-product.comp b/reference/opt/shaders/comp/outer-product.comp new file mode 100644 index 0000000000..d31dad3b2c --- /dev/null +++ b/reference/opt/shaders/comp/outer-product.comp @@ -0,0 +1,36 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) writeonly buffer SSBO +{ + mat2 m22; + mat2x3 m23; + mat2x4 m24; + mat3x2 m32; + mat3 m33; + mat3x4 m34; + mat4x2 m42; + mat4x3 m43; + mat4 m44; +} _21; + +layout(binding = 1, std430) readonly buffer ReadSSBO +{ + vec2 v2; + vec3 v3; + vec4 v4; +} _26; + +void main() +{ + _21.m22 = outerProduct(_26.v2, _26.v2); + _21.m23 = outerProduct(_26.v3, _26.v2); + _21.m24 = outerProduct(_26.v4, _26.v2); + _21.m32 = outerProduct(_26.v2, _26.v3); + _21.m33 = outerProduct(_26.v3, _26.v3); + _21.m34 = outerProduct(_26.v4, _26.v3); + _21.m42 = outerProduct(_26.v2, _26.v4); + _21.m43 = outerProduct(_26.v3, _26.v4); + _21.m44 = outerProduct(_26.v4, _26.v4); +} + diff --git a/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..c28face28a --- /dev/null +++ b/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,19 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a; + float b; + float c; + float d; + float e; +} _9; + +void main() +{ + _9.c = distance(_9.a, _9.b); + _9.d = length(_9.a); + _9.e = normalize(_9.a); +} + diff --git a/reference/opt/shaders/frag/for-loop-continue-control-flow.frag b/reference/opt/shaders/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..f49b906099 --- /dev/null +++ b/reference/opt/shaders/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,15 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(0.0); + for (int _43 = 0; _43 < 3; ) + { + FragColor[_43] += float(_43); + _43++; + continue; + } +} + diff --git a/reference/opt/shaders/frag/scalar-refract-reflect.frag b/reference/opt/shaders/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..94d671b5a9 --- /dev/null +++ b/reference/opt/shaders/frag/scalar-refract-reflect.frag @@ -0,0 +1,13 @@ +#version 450 + +layout(location = 0) out float FragColor; +layout(location = 0) in vec3 vRefract; + +void main() +{ + FragColor = refract(vRefract.x, vRefract.y, vRefract.z); + FragColor += reflect(vRefract.x, vRefract.y); + FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; + FragColor += reflect(vRefract.xy, vRefract.zy).y; +} + diff --git a/reference/opt/shaders/frag/selection-block-dominator.frag b/reference/opt/shaders/frag/selection-block-dominator.frag new file mode 100644 index 0000000000..f737f48917 --- /dev/null +++ b/reference/opt/shaders/frag/selection-block-dominator.frag @@ -0,0 +1,19 @@ +#version 450 + +layout(location = 0) flat in int vIndex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + for (;;) + { + if (vIndex != 1) + { + FragColor = vec4(1.0); + break; + } + FragColor = vec4(10.0); + break; + } +} + diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag new file mode 100644 index 0000000000..44cc8ab221 --- /dev/null +++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag @@ -0,0 +1,52 @@ +struct UBO_1_1 +{ + float4 v[64]; +}; + +ConstantBuffer ubos[] : register(b0, space2); +ByteAddressBuffer ssbos[] : register(t0, space3); +Texture2D uSamplers[] : register(t0, space0); +SamplerState uSamps[] : register(s0, space1); +Texture2D uCombinedSamplers[] : register(t4, space0); +SamplerState _uCombinedSamplers_sampler[] : register(s4, space0); + +static int vIndex; +static float4 FragColor; +static float2 vUV; + +struct SPIRV_Cross_Input +{ + nointerpolation int vIndex : TEXCOORD0; + float2 vUV : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + int i = vIndex; + int _59 = i + 10; + int _64 = i + 40; + FragColor = uSamplers[NonUniformResourceIndex(_59)].Sample(uSamps[NonUniformResourceIndex(_64)], vUV); + int _71 = i + 10; + FragColor = uCombinedSamplers[NonUniformResourceIndex(_71)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_71)], vUV); + int _77 = i + 20; + int _80 = i + 40; + FragColor += ubos[NonUniformResourceIndex(_77)].v[_80]; + int _87 = i + 50; + int _90 = i + 60; + FragColor += asfloat(ssbos[NonUniformResourceIndex(_87)].Load4(_90 * 16 + 0)); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vIndex = stage_input.vIndex; + vUV = stage_input.vUV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag new file mode 100644 index 0000000000..322c86bed3 --- /dev/null +++ b/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag @@ -0,0 +1,29 @@ +Texture2D uTexture : register(t0); +SamplerState _uTexture_sampler : register(s0); + +static min16float4 FragColor; +static min16float2 UV; + +struct SPIRV_Cross_Input +{ + min16float2 UV : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + min16float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV)); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + UV = stage_input.UV; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/comp/outer-product.comp b/reference/shaders-hlsl/comp/outer-product.comp new file mode 100644 index 0000000000..71613d4f15 --- /dev/null +++ b/reference/shaders-hlsl/comp/outer-product.comp @@ -0,0 +1,48 @@ +RWByteAddressBuffer _21 : register(u0); +ByteAddressBuffer _26 : register(t1); + +void comp_main() +{ + float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y); + _21.Store2(0, asuint(_32[0])); + _21.Store2(8, asuint(_32[1])); + float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y); + _21.Store3(16, asuint(_41[0])); + _21.Store3(32, asuint(_41[1])); + float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y); + _21.Store4(48, asuint(_50[0])); + _21.Store4(64, asuint(_50[1])); + float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z); + _21.Store2(80, asuint(_58[0])); + _21.Store2(88, asuint(_58[1])); + _21.Store2(96, asuint(_58[2])); + float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z); + _21.Store3(112, asuint(_66[0])); + _21.Store3(128, asuint(_66[1])); + _21.Store3(144, asuint(_66[2])); + float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z); + _21.Store4(160, asuint(_74[0])); + _21.Store4(176, asuint(_74[1])); + _21.Store4(192, asuint(_74[2])); + float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w); + _21.Store2(208, asuint(_82[0])); + _21.Store2(216, asuint(_82[1])); + _21.Store2(224, asuint(_82[2])); + _21.Store2(232, asuint(_82[3])); + float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w); + _21.Store3(240, asuint(_90[0])); + _21.Store3(256, asuint(_90[1])); + _21.Store3(272, asuint(_90[2])); + _21.Store3(288, asuint(_90[3])); + float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w); + _21.Store4(304, asuint(_98[0])); + _21.Store4(320, asuint(_98[1])); + _21.Store4(336, asuint(_98[2])); + _21.Store4(352, asuint(_98[3])); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..47f2fe4107 --- /dev/null +++ b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,14 @@ +RWByteAddressBuffer _9 : register(u0); + +void comp_main() +{ + _9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4))))); + _9.Store(12, asuint(length(asfloat(_9.Load(0))))); + _9.Store(16, asuint(sign(asfloat(_9.Load(0))))); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag b/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..12e1e7f5fd --- /dev/null +++ b/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,44 @@ +static float4 FragColor; + +struct SPIRV_Cross_Output +{ + float4 FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = 0.0f.xxxx; + int i = 0; + int _36; + for (;;) + { + if (i < 3) + { + int a = i; + FragColor[a] += float(i); + if (false) + { + _36 = 1; + } + else + { + int _41 = i; + i = _41 + 1; + _36 = _41; + } + continue; + } + else + { + break; + } + } +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..0fb694c543 --- /dev/null +++ b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag @@ -0,0 +1,49 @@ +static float FragColor; +static float3 vRefract; + +struct SPIRV_Cross_Input +{ + float3 vRefract : TEXCOORD0; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +float SPIRV_Cross_Reflect(float i, float n) +{ + return i - 2.0 * dot(n, i) * n; +} + +float SPIRV_Cross_Refract(float i, float n, float eta) +{ + float NoI = n * i; + float NoI2 = NoI * NoI; + float k = 1.0 - eta * eta * (1.0 - NoI2); + if (k < 0.0) + { + return 0.0; + } + else + { + return eta * i - (eta * NoI + sqrt(k)) * n; + } +} + +void frag_main() +{ + FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z); + FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y); + FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; + FragColor += reflect(vRefract.xy, vRefract.zy).y; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vRefract = stage_input.vRefract; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag new file mode 100644 index 0000000000..97345820b6 --- /dev/null +++ b/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag @@ -0,0 +1,22 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + half4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + half2 UV [[user(locn0)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]]) +{ + main0_out out = {}; + out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV))); + return out; +} + diff --git a/reference/shaders-msl/comp/outer-product.comp b/reference/shaders-msl/comp/outer-product.comp new file mode 100644 index 0000000000..8e32db392e --- /dev/null +++ b/reference/shaders-msl/comp/outer-product.comp @@ -0,0 +1,38 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float2x2 m22; + float2x3 m23; + float2x4 m24; + float3x2 m32; + float3x3 m33; + float3x4 m34; + float4x2 m42; + float4x3 m43; + float4x4 m44; +}; + +struct ReadSSBO +{ + float2 v2; + float3 v3; + float4 v4; +}; + +kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]]) +{ + _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y); + _21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y); + _21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y); + _21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z); + _21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z); + _21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z); + _21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w); + _21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w); + _21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w); +} + diff --git a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..312a6f9453 --- /dev/null +++ b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,21 @@ +#include +#include + +using namespace metal; + +struct SSBO +{ + float a; + float b; + float c; + float d; + float e; +}; + +kernel void main0(device SSBO& _9 [[buffer(0)]]) +{ + _9.c = abs(_9.a - _9.b); + _9.d = abs(_9.a); + _9.e = sign(_9.a); +} + diff --git a/reference/shaders-msl/frag/for-loop-continue-control-flow.frag b/reference/shaders-msl/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..847ec6532f --- /dev/null +++ b/reference/shaders-msl/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,42 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +fragment main0_out main0() +{ + main0_out out = {}; + out.FragColor = float4(0.0); + int i = 0; + int _36; + for (;;) + { + if (i < 3) + { + int a = i; + out.FragColor[a] += float(i); + if (false) + { + _36 = 1; + } + else + { + int _41 = i; + i = _41 + 1; + _36 = _41; + } + continue; + } + else + { + break; + } + } + return out; +} + diff --git a/reference/shaders-msl/frag/scalar-refract-reflect.frag b/reference/shaders-msl/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..592d445810 --- /dev/null +++ b/reference/shaders-msl/frag/scalar-refract-reflect.frag @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +struct main0_in +{ + float3 vRefract [[user(locn0)]]; +}; + +template +inline T spvReflect(T i, T n) +{ + return i - T(2) * i * n * n; +} + +template +inline T spvRefract(T i, T n, T eta) +{ + T NoI = n * i; + T NoI2 = NoI * NoI; + T k = T(1) - eta * eta * (T(1) - NoI2); + if (k < T(0)) + { + return T(0); + } + else + { + return eta * i - (eta * NoI + sqrt(k)) * n; + } +} + +fragment main0_out main0(main0_in in [[stage_in]]) +{ + main0_out out = {}; + out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z); + out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y); + out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y; + out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y; + return out; +} + diff --git a/reference/shaders-msl/frag/subgroup-builtins.msl22.frag b/reference/shaders-msl/frag/subgroup-builtins.msl22.frag new file mode 100644 index 0000000000..03a536f0c6 --- /dev/null +++ b/reference/shaders-msl/frag/subgroup-builtins.msl22.frag @@ -0,0 +1,18 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + uint2 FragColor [[color(0)]]; +}; + +fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) +{ + main0_out out = {}; + out.FragColor.x = gl_SubgroupSize; + out.FragColor.y = gl_SubgroupInvocationID; + return out; +} + diff --git a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag new file mode 100644 index 0000000000..23c554940b --- /dev/null +++ b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag @@ -0,0 +1,32 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 FragColor [[color(0)]]; +}; + +struct main0_in +{ + float4 vColor [[user(locn0)]]; + float2 vTex_0 [[user(locn1)]]; + float2 vTex_1 [[user(locn2)]]; + float2 vTex_2 [[user(locn3)]]; + float2 vTex_3 [[user(locn4)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]]) +{ + main0_out out = {}; + float2 vTex[4] = {}; + vTex[0] = in.vTex_0; + vTex[1] = in.vTex_1; + vTex[2] = in.vTex_2; + vTex[3] = in.vTex_3; + gl_ViewIndex += spvViewMask[0]; + out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]); + return out; +} + diff --git a/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag new file mode 100644 index 0000000000..affaf86d54 --- /dev/null +++ b/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag @@ -0,0 +1,143 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float FragColor [[color(0)]]; +}; + +inline uint4 spvSubgroupBallot(bool value) +{ + simd_vote vote = simd_ballot(value); + // simd_ballot() returns a 64-bit integer-like object, but + // SPIR-V callers expect a uint4. We must convert. + // FIXME: This won't include higher bits if Apple ever supports + // 128 lanes in an SIMD-group. + return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0); +} + +inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit) +{ + return !!extract_bits(ballot[bit / 32], bit % 32, 1); +} + +inline uint spvSubgroupBallotFindLSB(uint4 ballot) +{ + return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0); +} + +inline uint spvSubgroupBallotFindMSB(uint4 ballot) +{ + return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0); +} + +inline uint spvSubgroupBallotBitCount(uint4 ballot) +{ + return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w); +} + +inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + return spvSubgroupBallotBitCount(ballot & mask); +} + +inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID) +{ + uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + return spvSubgroupBallotBitCount(ballot & mask); +} + +template +inline bool spvSubgroupAllEqual(T value) +{ + return simd_all(value == simd_broadcast_first(value)); +} + +template<> +inline bool spvSubgroupAllEqual(bool value) +{ + return simd_all(value) || !simd_any(value); +} + +fragment main0_out main0() +{ + main0_out out = {}; + uint gl_SubgroupSize = simd_sum(1); + uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1); + uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0)); + uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0)); + uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0)); + uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0)); + uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); + out.FragColor = float(gl_SubgroupSize); + out.FragColor = float(gl_SubgroupInvocationID); + bool elected = simd_is_first(); + out.FragColor = float4(gl_SubgroupEqMask).x; + out.FragColor = float4(gl_SubgroupGeMask).x; + out.FragColor = float4(gl_SubgroupGtMask).x; + out.FragColor = float4(gl_SubgroupLeMask).x; + out.FragColor = float4(gl_SubgroupLtMask).x; + float4 broadcasted = simd_broadcast(float4(10.0), 8u); + float3 first = simd_broadcast_first(float3(20.0)); + uint4 ballot_value = spvSubgroupBallot(true); + bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID); + bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u); + uint bit_count = spvSubgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID); + uint lsb = spvSubgroupBallotFindLSB(ballot_value); + uint msb = spvSubgroupBallotFindMSB(ballot_value); + uint shuffled = simd_shuffle(10u, 8u); + uint shuffled_xor = simd_shuffle_xor(30u, 8u); + uint shuffled_up = simd_shuffle_up(20u, 4u); + uint shuffled_down = simd_shuffle_down(20u, 4u); + bool has_all = simd_all(true); + bool has_any = simd_any(true); + bool has_equal = spvSubgroupAllEqual(0); + has_equal = spvSubgroupAllEqual(true); + float4 added = simd_sum(float4(20.0)); + int4 iadded = simd_sum(int4(20)); + float4 multiplied = simd_product(float4(20.0)); + int4 imultiplied = simd_product(int4(20)); + float4 lo = simd_min(float4(20.0)); + float4 hi = simd_max(float4(20.0)); + int4 slo = simd_min(int4(20)); + int4 shi = simd_max(int4(20)); + uint4 ulo = simd_min(uint4(20u)); + uint4 uhi = simd_max(uint4(20u)); + uint4 anded = simd_and(ballot_value); + uint4 ored = simd_or(ballot_value); + uint4 xored = simd_xor(ballot_value); + added = simd_prefix_inclusive_sum(added); + iadded = simd_prefix_inclusive_sum(iadded); + multiplied = simd_prefix_inclusive_product(multiplied); + imultiplied = simd_prefix_inclusive_product(imultiplied); + added = simd_prefix_exclusive_sum(multiplied); + multiplied = simd_prefix_exclusive_product(multiplied); + iadded = simd_prefix_exclusive_sum(imultiplied); + imultiplied = simd_prefix_exclusive_product(imultiplied); + added = quad_sum(added); + multiplied = quad_product(multiplied); + iadded = quad_sum(iadded); + imultiplied = quad_product(imultiplied); + lo = quad_min(lo); + hi = quad_max(hi); + ulo = quad_min(ulo); + uhi = quad_max(uhi); + slo = quad_min(slo); + shi = quad_max(shi); + anded = quad_and(anded); + ored = quad_or(ored); + xored = quad_xor(xored); + float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u); + float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u); + float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u); + float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u); + return out; +} + diff --git a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert new file mode 100644 index 0000000000..c42e67211e --- /dev/null +++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert @@ -0,0 +1,31 @@ +#include +#include + +using namespace metal; + +struct MVPs +{ + float4x4 MVP[2]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; + uint gl_Layer [[render_target_array_index]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1]; + gl_InstanceIndex /= spvViewMask[1]; + out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; + out.gl_Layer = gl_ViewIndex - spvViewMask[0]; + return out; +} + diff --git a/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert new file mode 100644 index 0000000000..f87d2a11ad --- /dev/null +++ b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert @@ -0,0 +1,29 @@ +#include +#include + +using namespace metal; + +struct MVPs +{ + float4x4 MVP[2]; +}; + +struct main0_out +{ + float4 gl_Position [[position]]; + uint gl_Layer [[render_target_array_index]]; +}; + +struct main0_in +{ + float4 Position [[attribute(0)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + const uint gl_ViewIndex = 0; + out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position; + return out; +} + diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk new file mode 100644 index 0000000000..5f7ddeee17 --- /dev/null +++ b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk @@ -0,0 +1,37 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout(set = 0, binding = 2, std140) uniform UBO +{ + vec4 v[64]; +} ubos[]; + +layout(set = 0, binding = 3, std430) readonly buffer SSBO +{ + vec4 v[]; +} ssbos[]; + +layout(set = 0, binding = 0) uniform texture2D uSamplers[]; +layout(set = 0, binding = 1) uniform sampler uSamps[]; +layout(set = 0, binding = 4) uniform sampler2D uCombinedSamplers[]; + +layout(location = 0) flat in int vIndex; +layout(location = 0) out vec4 FragColor; +layout(location = 1) in vec2 vUV; + +void main() +{ + int i = vIndex; + int _59 = i + 10; + int _64 = i + 40; + FragColor = texture(sampler2D(uSamplers[nonuniformEXT(_59)], uSamps[nonuniformEXT(_64)]), vUV); + int _71 = i + 10; + FragColor = texture(uCombinedSamplers[nonuniformEXT(_71)], vUV); + int _77 = i + 20; + int _80 = i + 40; + FragColor += ubos[nonuniformEXT(_77)].v[_80]; + int _87 = i + 50; + int _90 = i + 60; + FragColor += ssbos[nonuniformEXT(_87)].v[_90]; +} + diff --git a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp new file mode 100644 index 0000000000..9ae8d6fd7f --- /dev/null +++ b/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp @@ -0,0 +1,25 @@ +#version 450 +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture; + +void main() +{ + int _30; + _30 = 7; + int _27_copy; + for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy) + { + if (5.0 > float(_27)) + { + break; + } + else + { + continue; + } + continue; + } + imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0)); +} + diff --git a/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp new file mode 100644 index 0000000000..7755593f57 --- /dev/null +++ b/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp @@ -0,0 +1,18 @@ +#version 430 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer _8_9 +{ + float _m0[]; +} _9; + +layout(binding = 1, std430) buffer _8_10 +{ + float _m0[]; +} _10; + +void main() +{ + _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x]; +} + diff --git a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag new file mode 100644 index 0000000000..e0feb49327 --- /dev/null +++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag @@ -0,0 +1,19 @@ +#version 450 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_NV_gpu_shader5) +#extension GL_NV_gpu_shader5 : require +#else +#error No extension available for FP16. +#endif + +layout(binding = 0) uniform sampler2D uTexture; + +layout(location = 0) out f16vec4 FragColor; +layout(location = 0) in f16vec2 UV; + +void main() +{ + FragColor = f16vec4(texture(uTexture, UV)); +} + diff --git a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk new file mode 100644 index 0000000000..e13e4254ea --- /dev/null +++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk @@ -0,0 +1,20 @@ +#version 450 +#if defined(GL_AMD_gpu_shader_half_float) +#extension GL_AMD_gpu_shader_half_float : require +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#else +#error No extension available for FP16. +#endif +#extension GL_EXT_shader_16bit_storage : require + +layout(set = 0, binding = 0) uniform sampler2D uTexture; + +layout(location = 0) out f16vec4 FragColor; +layout(location = 0) in f16vec2 UV; + +void main() +{ + FragColor = f16vec4(texture(uTexture, UV)); +} + diff --git a/reference/shaders/comp/outer-product.comp b/reference/shaders/comp/outer-product.comp new file mode 100644 index 0000000000..d31dad3b2c --- /dev/null +++ b/reference/shaders/comp/outer-product.comp @@ -0,0 +1,36 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) writeonly buffer SSBO +{ + mat2 m22; + mat2x3 m23; + mat2x4 m24; + mat3x2 m32; + mat3 m33; + mat3x4 m34; + mat4x2 m42; + mat4x3 m43; + mat4 m44; +} _21; + +layout(binding = 1, std430) readonly buffer ReadSSBO +{ + vec2 v2; + vec3 v3; + vec4 v4; +} _26; + +void main() +{ + _21.m22 = outerProduct(_26.v2, _26.v2); + _21.m23 = outerProduct(_26.v3, _26.v2); + _21.m24 = outerProduct(_26.v4, _26.v2); + _21.m32 = outerProduct(_26.v2, _26.v3); + _21.m33 = outerProduct(_26.v3, _26.v3); + _21.m34 = outerProduct(_26.v4, _26.v3); + _21.m42 = outerProduct(_26.v2, _26.v4); + _21.m43 = outerProduct(_26.v3, _26.v4); + _21.m44 = outerProduct(_26.v4, _26.v4); +} + diff --git a/reference/shaders/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..c28face28a --- /dev/null +++ b/reference/shaders/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,19 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float a; + float b; + float c; + float d; + float e; +} _9; + +void main() +{ + _9.c = distance(_9.a, _9.b); + _9.d = length(_9.a); + _9.e = normalize(_9.a); +} + diff --git a/reference/shaders/frag/for-loop-continue-control-flow.frag b/reference/shaders/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..638d89be6b --- /dev/null +++ b/reference/shaders/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,34 @@ +#version 450 + +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vec4(0.0); + int i = 0; + int _36; + for (;;) + { + if (i < 3) + { + int a = i; + FragColor[a] += float(i); + if (false) + { + _36 = 1; + } + else + { + int _41 = i; + i = _41 + 1; + _36 = _41; + } + continue; + } + else + { + break; + } + } +} + diff --git a/reference/shaders/frag/scalar-refract-reflect.frag b/reference/shaders/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..94d671b5a9 --- /dev/null +++ b/reference/shaders/frag/scalar-refract-reflect.frag @@ -0,0 +1,13 @@ +#version 450 + +layout(location = 0) out float FragColor; +layout(location = 0) in vec3 vRefract; + +void main() +{ + FragColor = refract(vRefract.x, vRefract.y, vRefract.z); + FragColor += reflect(vRefract.x, vRefract.y); + FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; + FragColor += reflect(vRefract.xy, vRefract.zy).y; +} + diff --git a/reference/shaders/frag/selection-block-dominator.frag b/reference/shaders/frag/selection-block-dominator.frag new file mode 100644 index 0000000000..a0242a00b5 --- /dev/null +++ b/reference/shaders/frag/selection-block-dominator.frag @@ -0,0 +1,20 @@ +#version 450 + +layout(location = 0) flat in int vIndex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + int v; + if (vIndex != 1) + { + FragColor = vec4(1.0); + return; + } + else + { + v = 10; + } + FragColor = vec4(float(v)); +} + diff --git a/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag b/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag new file mode 100644 index 0000000000..5aa68849b1 --- /dev/null +++ b/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag @@ -0,0 +1,159 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 93 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniformEXT + OpCapability RuntimeDescriptorArrayEXT + OpCapability UniformBufferArrayNonUniformIndexingEXT + OpCapability SampledImageArrayNonUniformIndexingEXT + OpCapability StorageBufferArrayNonUniformIndexingEXT + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vIndex %FragColor %vUV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %i "i" + OpName %vIndex "vIndex" + OpName %FragColor "FragColor" + OpName %uSamplers "uSamplers" + OpName %uSamps "uSamps" + OpName %vUV "vUV" + OpName %uCombinedSamplers "uCombinedSamplers" + OpName %UBO "UBO" + OpMemberName %UBO 0 "v" + OpName %ubos "ubos" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbos "ssbos" + OpDecorate %vIndex Flat + OpDecorate %vIndex Location 0 + OpDecorate %FragColor Location 0 + OpDecorate %uSamplers DescriptorSet 0 + OpDecorate %uSamplers Binding 0 + + OpDecorate %sampled_image NonUniformEXT + OpDecorate %combined_sampler NonUniformEXT + OpDecorate %ubo_ptr_copy NonUniformEXT + OpDecorate %ssbo_ptr_copy NonUniformEXT + + OpDecorate %uSamps DescriptorSet 1 + OpDecorate %uSamps Binding 0 + OpDecorate %vUV Location 1 + OpDecorate %uCombinedSamplers DescriptorSet 0 + OpDecorate %uCombinedSamplers Binding 4 + OpDecorate %_arr_v4float_uint_64 ArrayStride 16 + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %ubos DescriptorSet 2 + OpDecorate %ubos Binding 0 + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 NonWritable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %ssbos DescriptorSet 3 + OpDecorate %ssbos Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Input_int = OpTypePointer Input %int + %vIndex = OpVariable %_ptr_Input_int Input + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %16 = OpTypeImage %float 2D 0 0 0 1 Unknown +%_runtimearr_16 = OpTypeRuntimeArray %16 +%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16 + %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant + %int_10 = OpConstant %int 10 +%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16 + %27 = OpTypeSampler +%_runtimearr_27 = OpTypeRuntimeArray %27 +%_ptr_UniformConstant__runtimearr_27 = OpTypePointer UniformConstant %_runtimearr_27 + %uSamps = OpVariable %_ptr_UniformConstant__runtimearr_27 UniformConstant + %int_40 = OpConstant %int 40 +%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27 + %38 = OpTypeSampledImage %16 + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input +%_runtimearr_38 = OpTypeRuntimeArray %38 +%_ptr_UniformConstant__runtimearr_38 = OpTypePointer UniformConstant %_runtimearr_38 +%uCombinedSamplers = OpVariable %_ptr_UniformConstant__runtimearr_38 UniformConstant +%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38 + %uint = OpTypeInt 32 0 + %uint_64 = OpConstant %uint 64 +%_arr_v4float_uint_64 = OpTypeArray %v4float %uint_64 + %UBO = OpTypeStruct %_arr_v4float_uint_64 +%_runtimearr_UBO = OpTypeRuntimeArray %UBO +%_ptr_Uniform__runtimearr_UBO = OpTypePointer Uniform %_runtimearr_UBO + %ubos = OpVariable %_ptr_Uniform__runtimearr_UBO Uniform + %int_20 = OpConstant %int 20 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO +%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO + %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform + %int_50 = OpConstant %int 50 + %int_60 = OpConstant %int 60 + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %11 = OpLoad %int %vIndex + OpStore %i %11 + %20 = OpLoad %int %i + %22 = OpIAdd %int %20 %int_10 + %23 = OpCopyObject %int %22 + %25 = OpAccessChain %_ptr_UniformConstant_16 %uSamplers %23 + %26 = OpLoad %16 %25 + %31 = OpLoad %int %i + %33 = OpIAdd %int %31 %int_40 + %34 = OpCopyObject %int %33 + %36 = OpAccessChain %_ptr_UniformConstant_27 %uSamps %34 + %37 = OpLoad %27 %36 + %sampled_image = OpSampledImage %38 %26 %37 + %43 = OpLoad %v2float %vUV + %44 = OpImageSampleImplicitLod %v4float %sampled_image %43 + OpStore %FragColor %44 + %48 = OpLoad %int %i + %49 = OpIAdd %int %48 %int_10 + %50 = OpCopyObject %int %49 + %52 = OpAccessChain %_ptr_UniformConstant_38 %uCombinedSamplers %50 + %combined_sampler = OpLoad %38 %52 + %54 = OpLoad %v2float %vUV + %55 = OpImageSampleImplicitLod %v4float %combined_sampler %54 + OpStore %FragColor %55 + %63 = OpLoad %int %i + %65 = OpIAdd %int %63 %int_20 + %66 = OpCopyObject %int %65 + %68 = OpLoad %int %i + %69 = OpIAdd %int %68 %int_40 + %70 = OpCopyObject %int %69 + %ubo_ptr = OpAccessChain %_ptr_Uniform_v4float %ubos %66 %int_0 %70 + %ubo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ubo_ptr + %73 = OpLoad %v4float %ubo_ptr_copy + %74 = OpLoad %v4float %FragColor + %75 = OpFAdd %v4float %74 %73 + OpStore %FragColor %75 + %81 = OpLoad %int %i + %83 = OpIAdd %int %81 %int_50 + %84 = OpCopyObject %int %83 + %85 = OpLoad %int %i + %87 = OpIAdd %int %85 %int_60 + %88 = OpCopyObject %int %87 + %ssbo_ptr = OpAccessChain %_ptr_Uniform_v4float %ssbos %84 %int_0 %88 + %ssbo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ssbo_ptr + %90 = OpLoad %v4float %ssbo_ptr_copy + %91 = OpLoad %v4float %FragColor + %92 = OpFAdd %v4float %91 %90 + OpStore %FragColor %92 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag new file mode 100644 index 0000000000..ae7a972d7b --- /dev/null +++ b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 25 +; Schema: 0 + OpCapability Shader + OpCapability StorageInputOutput16 + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %UV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16" + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uTexture "uTexture" + OpName %UV "UV" + OpDecorate %FragColor Location 0 + OpDecorate %uTexture DescriptorSet 0 + OpDecorate %uTexture Binding 0 + OpDecorate %UV Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %float = OpTypeFloat 32 + %v4half = OpTypeVector %half 4 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4half = OpTypePointer Output %v4half + %FragColor = OpVariable %_ptr_Output_v4half Output + %11 = OpTypeImage %float 2D 0 0 0 1 Unknown + %12 = OpTypeSampledImage %11 +%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12 + %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant + %v2half = OpTypeVector %half 2 +%_ptr_Input_v2half = OpTypePointer Input %v2half + %UV = OpVariable %_ptr_Input_v2half Input + %main = OpFunction %void None %3 + %5 = OpLabel + %15 = OpLoad %12 %uTexture + %19 = OpLoad %v2half %UV + %23 = OpImageSampleImplicitLod %v4float %15 %19 + %24 = OpFConvert %v4half %23 + OpStore %FragColor %24 + OpReturn + OpFunctionEnd diff --git a/shaders-hlsl/comp/outer-product.comp b/shaders-hlsl/comp/outer-product.comp new file mode 100644 index 0000000000..9aba2a54b7 --- /dev/null +++ b/shaders-hlsl/comp/outer-product.comp @@ -0,0 +1,37 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, std430) writeonly buffer SSBO +{ + mat2 m22; + mat2x3 m23; + mat2x4 m24; + mat3x2 m32; + mat3 m33; + mat3x4 m34; + mat4x2 m42; + mat4x3 m43; + mat4 m44; +}; + +layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO +{ + vec2 v2; + vec3 v3; + vec4 v4; +}; + +void main() +{ + m22 = outerProduct(v2, v2); + m23 = outerProduct(v3, v2); + m24 = outerProduct(v4, v2); + + m32 = outerProduct(v2, v3); + m33 = outerProduct(v3, v3); + m34 = outerProduct(v4, v3); + + m42 = outerProduct(v2, v4); + m43 = outerProduct(v3, v4); + m44 = outerProduct(v4, v4); +} diff --git a/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..37414737fd --- /dev/null +++ b/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std430, set = 0, binding = 0) buffer SSBO +{ + float a; + float b; + float c; + float d; + float e; +}; + +void main() +{ + c = distance(a, b); + d = length(a); + e = normalize(a); +} diff --git a/shaders-hlsl/frag/for-loop-continue-control-flow.frag b/shaders-hlsl/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..1f91cca2a4 --- /dev/null +++ b/shaders-hlsl/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,11 @@ +#version 450 +layout(location = 0) out vec4 FragColor; +void main() +{ + FragColor = vec4(0.0); + for (int i = 0; i < 3; (0 > 1) ? 1 : i ++) + { + int a = i; + FragColor[a] += float(i); + } +} diff --git a/shaders-hlsl/frag/scalar-refract-reflect.frag b/shaders-hlsl/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..486ed90bd5 --- /dev/null +++ b/shaders-hlsl/frag/scalar-refract-reflect.frag @@ -0,0 +1,11 @@ +#version 450 +layout(location = 0) out float FragColor; +layout(location = 0) in vec3 vRefract; + +void main() +{ + FragColor = refract(vRefract.x, vRefract.y, vRefract.z); + FragColor += reflect(vRefract.x, vRefract.y); + FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; + FragColor += reflect(vRefract.xy, vRefract.zy).y; +} diff --git a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag new file mode 100644 index 0000000000..ae7a972d7b --- /dev/null +++ b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 25 +; Schema: 0 + OpCapability Shader + OpCapability StorageInputOutput16 + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %UV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16" + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uTexture "uTexture" + OpName %UV "UV" + OpDecorate %FragColor Location 0 + OpDecorate %uTexture DescriptorSet 0 + OpDecorate %uTexture Binding 0 + OpDecorate %UV Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %float = OpTypeFloat 32 + %v4half = OpTypeVector %half 4 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4half = OpTypePointer Output %v4half + %FragColor = OpVariable %_ptr_Output_v4half Output + %11 = OpTypeImage %float 2D 0 0 0 1 Unknown + %12 = OpTypeSampledImage %11 +%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12 + %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant + %v2half = OpTypeVector %half 2 +%_ptr_Input_v2half = OpTypePointer Input %v2half + %UV = OpVariable %_ptr_Input_v2half Input + %main = OpFunction %void None %3 + %5 = OpLabel + %15 = OpLoad %12 %uTexture + %19 = OpLoad %v2half %UV + %23 = OpImageSampleImplicitLod %v4float %15 %19 + %24 = OpFConvert %v4half %23 + OpStore %FragColor %24 + OpReturn + OpFunctionEnd diff --git a/shaders-msl/comp/outer-product.comp b/shaders-msl/comp/outer-product.comp new file mode 100644 index 0000000000..9aba2a54b7 --- /dev/null +++ b/shaders-msl/comp/outer-product.comp @@ -0,0 +1,37 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, std430) writeonly buffer SSBO +{ + mat2 m22; + mat2x3 m23; + mat2x4 m24; + mat3x2 m32; + mat3 m33; + mat3x4 m34; + mat4x2 m42; + mat4x3 m43; + mat4 m44; +}; + +layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO +{ + vec2 v2; + vec3 v3; + vec4 v4; +}; + +void main() +{ + m22 = outerProduct(v2, v2); + m23 = outerProduct(v3, v2); + m24 = outerProduct(v4, v2); + + m32 = outerProduct(v2, v3); + m33 = outerProduct(v3, v3); + m34 = outerProduct(v4, v3); + + m42 = outerProduct(v2, v4); + m43 = outerProduct(v3, v4); + m44 = outerProduct(v4, v4); +} diff --git a/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..37414737fd --- /dev/null +++ b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std430, set = 0, binding = 0) buffer SSBO +{ + float a; + float b; + float c; + float d; + float e; +}; + +void main() +{ + c = distance(a, b); + d = length(a); + e = normalize(a); +} diff --git a/shaders-msl/frag/for-loop-continue-control-flow.frag b/shaders-msl/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..1f91cca2a4 --- /dev/null +++ b/shaders-msl/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,11 @@ +#version 450 +layout(location = 0) out vec4 FragColor; +void main() +{ + FragColor = vec4(0.0); + for (int i = 0; i < 3; (0 > 1) ? 1 : i ++) + { + int a = i; + FragColor[a] += float(i); + } +} diff --git a/shaders-msl/frag/scalar-refract-reflect.frag b/shaders-msl/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..486ed90bd5 --- /dev/null +++ b/shaders-msl/frag/scalar-refract-reflect.frag @@ -0,0 +1,11 @@ +#version 450 +layout(location = 0) out float FragColor; +layout(location = 0) in vec3 vRefract; + +void main() +{ + FragColor = refract(vRefract.x, vRefract.y, vRefract.z); + FragColor += reflect(vRefract.x, vRefract.y); + FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; + FragColor += reflect(vRefract.xy, vRefract.zy).y; +} diff --git a/shaders-msl/frag/subgroup-builtins.msl22.frag b/shaders-msl/frag/subgroup-builtins.msl22.frag new file mode 100644 index 0000000000..746438f00f --- /dev/null +++ b/shaders-msl/frag/subgroup-builtins.msl22.frag @@ -0,0 +1,10 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require + +layout(location = 0) out uvec2 FragColor; + +void main() +{ + FragColor.x = gl_SubgroupSize; + FragColor.y = gl_SubgroupInvocationID; +} diff --git a/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag new file mode 100644 index 0000000000..963493b871 --- /dev/null +++ b/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag @@ -0,0 +1,14 @@ +#version 310 es +#extension GL_EXT_multiview : require +precision mediump float; + +layout(location = 0) in vec4 vColor; +layout(location = 1) in vec2 vTex[4]; +layout(binding = 0) uniform sampler2D uTex; +layout(location = 0) out vec4 FragColor; + +void main() +{ + FragColor = vColor * texture(uTex, vTex[gl_ViewIndex]); +} + diff --git a/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag new file mode 100644 index 0000000000..3a2cf0234c --- /dev/null +++ b/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag @@ -0,0 +1,119 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require +#extension GL_KHR_shader_subgroup_quad : require + +layout(location = 0) out float FragColor; + +void main() +{ + // basic + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // shuffle + uint shuffled = subgroupShuffle(10u, 8u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + + // shuffle relative + uint shuffled_up = subgroupShuffleUp(20u, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal = subgroupAllEqual(0); + has_equal = subgroupAllEqual(true); + + // arithmetic + vec4 added = subgroupAdd(vec4(20.0)); + ivec4 iadded = subgroupAdd(ivec4(20)); + vec4 multiplied = subgroupMul(vec4(20.0)); + ivec4 imultiplied = subgroupMul(ivec4(20)); + vec4 lo = subgroupMin(vec4(20.0)); + vec4 hi = subgroupMax(vec4(20.0)); + ivec4 slo = subgroupMin(ivec4(20)); + ivec4 shi = subgroupMax(ivec4(20)); + uvec4 ulo = subgroupMin(uvec4(20)); + uvec4 uhi = subgroupMax(uvec4(20)); + uvec4 anded = subgroupAnd(ballot_value); + uvec4 ored = subgroupOr(ballot_value); + uvec4 xored = subgroupXor(ballot_value); + + added = subgroupInclusiveAdd(added); + iadded = subgroupInclusiveAdd(iadded); + multiplied = subgroupInclusiveMul(multiplied); + imultiplied = subgroupInclusiveMul(imultiplied); + //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupInclusiveMax(hi); + //slo = subgroupInclusiveMin(slo); + //shi = subgroupInclusiveMax(shi); + //ulo = subgroupInclusiveMin(ulo); + //uhi = subgroupInclusiveMax(uhi); + //anded = subgroupInclusiveAnd(anded); + //ored = subgroupInclusiveOr(ored); + //xored = subgroupInclusiveXor(ored); + //added = subgroupExclusiveAdd(lo); + + added = subgroupExclusiveAdd(multiplied); + multiplied = subgroupExclusiveMul(multiplied); + iadded = subgroupExclusiveAdd(imultiplied); + imultiplied = subgroupExclusiveMul(imultiplied); + //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupExclusiveMax(hi); + //ulo = subgroupExclusiveMin(ulo); + //uhi = subgroupExclusiveMax(uhi); + //slo = subgroupExclusiveMin(slo); + //shi = subgroupExclusiveMax(shi); + //anded = subgroupExclusiveAnd(anded); + //ored = subgroupExclusiveOr(ored); + //xored = subgroupExclusiveXor(ored); + + // clustered + added = subgroupClusteredAdd(added, 4u); + multiplied = subgroupClusteredMul(multiplied, 4u); + iadded = subgroupClusteredAdd(iadded, 4u); + imultiplied = subgroupClusteredMul(imultiplied, 4u); + lo = subgroupClusteredMin(lo, 4u); + hi = subgroupClusteredMax(hi, 4u); + ulo = subgroupClusteredMin(ulo, 4u); + uhi = subgroupClusteredMax(uhi, 4u); + slo = subgroupClusteredMin(slo, 4u); + shi = subgroupClusteredMax(shi, 4u); + anded = subgroupClusteredAnd(anded, 4u); + ored = subgroupClusteredOr(ored, 4u); + xored = subgroupClusteredXor(xored, 4u); + + // quad + vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); +} diff --git a/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert new file mode 100644 index 0000000000..eb1bc766f2 --- /dev/null +++ b/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert @@ -0,0 +1,14 @@ +#version 310 es +#extension GL_EXT_multiview : require + +layout(std140, binding = 0) uniform MVPs +{ + mat4 MVP[2]; +}; + +layout(location = 0) in vec4 Position; + +void main() +{ + gl_Position = MVP[gl_ViewIndex] * Position; +} diff --git a/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert new file mode 100644 index 0000000000..eb1bc766f2 --- /dev/null +++ b/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert @@ -0,0 +1,14 @@ +#version 310 es +#extension GL_EXT_multiview : require + +layout(std140, binding = 0) uniform MVPs +{ + mat4 MVP[2]; +}; + +layout(location = 0) in vec4 Position; + +void main() +{ + gl_Position = MVP[gl_ViewIndex] * Position; +} diff --git a/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag b/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag new file mode 100644 index 0000000000..e2d4562a8a --- /dev/null +++ b/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag @@ -0,0 +1,159 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 93 +; Schema: 0 + OpCapability Shader + OpCapability ShaderNonUniformEXT + OpCapability RuntimeDescriptorArrayEXT + OpCapability UniformBufferArrayNonUniformIndexingEXT + OpCapability SampledImageArrayNonUniformIndexingEXT + OpCapability StorageBufferArrayNonUniformIndexingEXT + OpExtension "SPV_EXT_descriptor_indexing" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %vIndex %FragColor %vUV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_nonuniform_qualifier" + OpName %main "main" + OpName %i "i" + OpName %vIndex "vIndex" + OpName %FragColor "FragColor" + OpName %uSamplers "uSamplers" + OpName %uSamps "uSamps" + OpName %vUV "vUV" + OpName %uCombinedSamplers "uCombinedSamplers" + OpName %UBO "UBO" + OpMemberName %UBO 0 "v" + OpName %ubos "ubos" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbos "ssbos" + OpDecorate %vIndex Flat + OpDecorate %vIndex Location 0 + OpDecorate %FragColor Location 0 + OpDecorate %uSamplers DescriptorSet 0 + OpDecorate %uSamplers Binding 0 + + OpDecorate %sampled_image NonUniformEXT + OpDecorate %combined_sampler NonUniformEXT + OpDecorate %ubo_ptr_copy NonUniformEXT + OpDecorate %ssbo_ptr_copy NonUniformEXT + + OpDecorate %uSamps DescriptorSet 0 + OpDecorate %uSamps Binding 1 + OpDecorate %vUV Location 1 + OpDecorate %uCombinedSamplers DescriptorSet 0 + OpDecorate %uCombinedSamplers Binding 4 + OpDecorate %_arr_v4float_uint_64 ArrayStride 16 + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %ubos DescriptorSet 0 + OpDecorate %ubos Binding 2 + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 NonWritable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %ssbos DescriptorSet 0 + OpDecorate %ssbos Binding 3 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int +%_ptr_Input_int = OpTypePointer Input %int + %vIndex = OpVariable %_ptr_Input_int Input + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4float = OpTypePointer Output %v4float + %FragColor = OpVariable %_ptr_Output_v4float Output + %16 = OpTypeImage %float 2D 0 0 0 1 Unknown +%_runtimearr_16 = OpTypeRuntimeArray %16 +%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16 + %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant + %int_10 = OpConstant %int 10 +%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16 + %27 = OpTypeSampler +%_runtimearr_27 = OpTypeRuntimeArray %27 +%_ptr_UniformConstant__runtimearr_27 = OpTypePointer UniformConstant %_runtimearr_27 + %uSamps = OpVariable %_ptr_UniformConstant__runtimearr_27 UniformConstant + %int_40 = OpConstant %int 40 +%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27 + %38 = OpTypeSampledImage %16 + %v2float = OpTypeVector %float 2 +%_ptr_Input_v2float = OpTypePointer Input %v2float + %vUV = OpVariable %_ptr_Input_v2float Input +%_runtimearr_38 = OpTypeRuntimeArray %38 +%_ptr_UniformConstant__runtimearr_38 = OpTypePointer UniformConstant %_runtimearr_38 +%uCombinedSamplers = OpVariable %_ptr_UniformConstant__runtimearr_38 UniformConstant +%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38 + %uint = OpTypeInt 32 0 + %uint_64 = OpConstant %uint 64 +%_arr_v4float_uint_64 = OpTypeArray %v4float %uint_64 + %UBO = OpTypeStruct %_arr_v4float_uint_64 +%_runtimearr_UBO = OpTypeRuntimeArray %UBO +%_ptr_Uniform__runtimearr_UBO = OpTypePointer Uniform %_runtimearr_UBO + %ubos = OpVariable %_ptr_Uniform__runtimearr_UBO Uniform + %int_20 = OpConstant %int 20 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO +%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO + %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform + %int_50 = OpConstant %int 50 + %int_60 = OpConstant %int 60 + %main = OpFunction %void None %3 + %5 = OpLabel + %i = OpVariable %_ptr_Function_int Function + %11 = OpLoad %int %vIndex + OpStore %i %11 + %20 = OpLoad %int %i + %22 = OpIAdd %int %20 %int_10 + %23 = OpCopyObject %int %22 + %25 = OpAccessChain %_ptr_UniformConstant_16 %uSamplers %23 + %26 = OpLoad %16 %25 + %31 = OpLoad %int %i + %33 = OpIAdd %int %31 %int_40 + %34 = OpCopyObject %int %33 + %36 = OpAccessChain %_ptr_UniformConstant_27 %uSamps %34 + %37 = OpLoad %27 %36 + %sampled_image = OpSampledImage %38 %26 %37 + %43 = OpLoad %v2float %vUV + %44 = OpImageSampleImplicitLod %v4float %sampled_image %43 + OpStore %FragColor %44 + %48 = OpLoad %int %i + %49 = OpIAdd %int %48 %int_10 + %50 = OpCopyObject %int %49 + %52 = OpAccessChain %_ptr_UniformConstant_38 %uCombinedSamplers %50 + %combined_sampler = OpLoad %38 %52 + %54 = OpLoad %v2float %vUV + %55 = OpImageSampleImplicitLod %v4float %combined_sampler %54 + OpStore %FragColor %55 + %63 = OpLoad %int %i + %65 = OpIAdd %int %63 %int_20 + %66 = OpCopyObject %int %65 + %68 = OpLoad %int %i + %69 = OpIAdd %int %68 %int_40 + %70 = OpCopyObject %int %69 + %ubo_ptr = OpAccessChain %_ptr_Uniform_v4float %ubos %66 %int_0 %70 + %ubo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ubo_ptr + %73 = OpLoad %v4float %ubo_ptr_copy + %74 = OpLoad %v4float %FragColor + %75 = OpFAdd %v4float %74 %73 + OpStore %FragColor %75 + %81 = OpLoad %int %i + %83 = OpIAdd %int %81 %int_50 + %84 = OpCopyObject %int %83 + %85 = OpLoad %int %i + %87 = OpIAdd %int %85 %int_60 + %88 = OpCopyObject %int %87 + %ssbo_ptr = OpAccessChain %_ptr_Uniform_v4float %ssbos %84 %int_0 %88 + %ssbo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ssbo_ptr + %90 = OpLoad %v4float %ssbo_ptr_copy + %91 = OpLoad %v4float %FragColor + %92 = OpFAdd %v4float %91 %90 + OpStore %FragColor %92 + OpReturn + OpFunctionEnd diff --git a/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp new file mode 100644 index 0000000000..611591246c --- /dev/null +++ b/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp @@ -0,0 +1,68 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google spiregg; 0 +; Bound: 42 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %cs_test "main" %gl_GlobalInvocationID %gl_LocalInvocationIndex + OpExecutionMode %cs_test LocalSize 8 8 1 + OpSource HLSL 600 + OpName %type_2d_image "type.2d.image" + OpName %outImageTexture "outImageTexture" + OpName %cs_test "cs_test" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %outImageTexture DescriptorSet 0 + OpDecorate %outImageTexture Binding 1 + %float = OpTypeFloat 32 + %float_5 = OpConstant %float 5 + %float_1 = OpConstant %float 1 + %int = OpTypeInt 32 1 + %int_7 = OpConstant %int 7 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 +%type_2d_image = OpTypeImage %float 2D 2 0 0 2 Rgba32f +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%_ptr_Input_uint = OpTypePointer Input %uint + %void = OpTypeVoid + %19 = OpTypeFunction %void + %v2uint = OpTypeVector %uint 2 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool +%outImageTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %cs_test = OpFunction %void None %19 + %23 = OpLabel + %24 = OpLoad %v3uint %gl_GlobalInvocationID + %25 = OpVectorShuffle %v2uint %24 %24 0 1 + OpBranch %26 + %26 = OpLabel + %27 = OpPhi %int %int_7 %23 %28 %29 + %30 = OpPhi %int %int_7 %23 %27 %29 + %31 = OpSGreaterThanEqual %bool %27 %int_0 + OpLoopMerge %32 %29 None + OpBranchConditional %31 %33 %32 + %33 = OpLabel + %34 = OpConvertSToF %float %27 + %35 = OpFOrdGreaterThan %bool %float_5 %34 + OpSelectionMerge %29 None + OpBranchConditional %35 %36 %29 + %36 = OpLabel + OpBranch %32 + %29 = OpLabel + %28 = OpISub %int %27 %int_1 + OpBranch %26 + %32 = OpLabel + %37 = OpISub %int %30 %int_1 + %38 = OpConvertSToF %float %37 + %39 = OpConvertSToF %float %30 + %40 = OpCompositeConstruct %v4float %38 %39 %float_1 %float_1 + %41 = OpLoad %type_2d_image %outImageTexture + OpImageWrite %41 %25 %40 None + OpReturn + OpFunctionEnd diff --git a/shaders/asm/extended-debug-extinst.invalid.asm.comp b/shaders/asm/extended-debug-extinst.invalid.asm.comp new file mode 100644 index 0000000000..5b6a189a71 --- /dev/null +++ b/shaders/asm/extended-debug-extinst.invalid.asm.comp @@ -0,0 +1,67 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "DebugInfo" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" %3 + OpExecutionMode %2 LocalSize 1 1 1 + %4 = OpString "negateInputs.comp" + %5 = OpString "negateInputs" + %6 = OpString "main" + %7 = OpString "" + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "gl_GlobalInvocationID" + OpDecorate %3 BuiltIn GlobalInvocationId + OpDecorate %8 BufferBlock + OpDecorate %9 DescriptorSet 0 + OpDecorate %9 Binding 0 + OpDecorate %10 DescriptorSet 0 + OpDecorate %10 Binding 1 + OpDecorate %11 ArrayStride 4 + OpMemberDecorate %8 0 Offset 0 + OpLine %4 0 0 + %12 = OpTypeBool + %13 = OpTypeVoid + %14 = OpTypeFunction %13 + %15 = OpTypeInt 32 0 + %16 = OpTypeInt 32 1 + %17 = OpTypeFloat 32 + %18 = OpTypeVector %15 3 + %19 = OpTypeVector %17 3 + %20 = OpTypePointer Input %18 + %21 = OpTypePointer Uniform %16 + %22 = OpTypePointer Uniform %17 + %23 = OpTypeRuntimeArray %16 + %11 = OpTypeRuntimeArray %17 + %8 = OpTypeStruct %11 + %24 = OpTypePointer Uniform %8 + %9 = OpVariable %24 Uniform + %10 = OpVariable %24 Uniform + OpLine %4 0 1 + OpLine %5 1 0 + OpLine %4 1000 100000 + %3 = OpVariable %20 Input + %25 = OpConstant %16 0 + OpNoLine + OpLine %4 1 1 + %26 = OpExtInst %13 %1 DebugInfoNone + %27 = OpExtInst %13 %1 DebugTypeFunction %13 + %28 = OpExtInst %13 %1 DebugFunction %6 %27 %4 1 1 %4 %7 FlagIsDefinition|FlagPrototyped|FlagIsOptimized 1 %26 %26 + %2 = OpFunction %13 None %14 + %29 = OpLabel + %30 = OpExtInst %13 %1 DebugScope %28 + OpLine %4 1 1 + %31 = OpLoad %18 %3 + %32 = OpCompositeExtract %15 %31 0 + %33 = OpAccessChain %22 %9 %25 %32 + %34 = OpLoad %17 %33 + %35 = OpFNegate %17 %34 + %36 = OpAccessChain %22 %10 %25 %32 + OpStore %36 %35 + OpNoLine + OpReturn + OpFunctionEnd diff --git a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag new file mode 100644 index 0000000000..ae7a972d7b --- /dev/null +++ b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 25 +; Schema: 0 + OpCapability Shader + OpCapability StorageInputOutput16 + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %FragColor %UV + OpExecutionMode %main OriginUpperLeft + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16" + OpName %main "main" + OpName %FragColor "FragColor" + OpName %uTexture "uTexture" + OpName %UV "UV" + OpDecorate %FragColor Location 0 + OpDecorate %uTexture DescriptorSet 0 + OpDecorate %uTexture Binding 0 + OpDecorate %UV Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %float = OpTypeFloat 32 + %v4half = OpTypeVector %half 4 + %v4float = OpTypeVector %float 4 +%_ptr_Output_v4half = OpTypePointer Output %v4half + %FragColor = OpVariable %_ptr_Output_v4half Output + %11 = OpTypeImage %float 2D 0 0 0 1 Unknown + %12 = OpTypeSampledImage %11 +%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12 + %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant + %v2half = OpTypeVector %half 2 +%_ptr_Input_v2half = OpTypePointer Input %v2half + %UV = OpVariable %_ptr_Input_v2half Input + %main = OpFunction %void None %3 + %5 = OpLabel + %15 = OpLoad %12 %uTexture + %19 = OpLoad %v2half %UV + %23 = OpImageSampleImplicitLod %v4float %15 %19 + %24 = OpFConvert %v4half %23 + OpStore %FragColor %24 + OpReturn + OpFunctionEnd diff --git a/shaders/comp/outer-product.comp b/shaders/comp/outer-product.comp new file mode 100644 index 0000000000..9aba2a54b7 --- /dev/null +++ b/shaders/comp/outer-product.comp @@ -0,0 +1,37 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, std430) writeonly buffer SSBO +{ + mat2 m22; + mat2x3 m23; + mat2x4 m24; + mat3x2 m32; + mat3 m33; + mat3x4 m34; + mat4x2 m42; + mat4x3 m43; + mat4 m44; +}; + +layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO +{ + vec2 v2; + vec3 v3; + vec4 v4; +}; + +void main() +{ + m22 = outerProduct(v2, v2); + m23 = outerProduct(v3, v2); + m24 = outerProduct(v4, v2); + + m32 = outerProduct(v2, v3); + m33 = outerProduct(v3, v3); + m34 = outerProduct(v4, v3); + + m42 = outerProduct(v2, v4); + m43 = outerProduct(v3, v4); + m44 = outerProduct(v4, v4); +} diff --git a/shaders/comp/scalar-std450-distance-length-normalize.comp b/shaders/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 0000000000..37414737fd --- /dev/null +++ b/shaders/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std430, set = 0, binding = 0) buffer SSBO +{ + float a; + float b; + float c; + float d; + float e; +}; + +void main() +{ + c = distance(a, b); + d = length(a); + e = normalize(a); +} diff --git a/shaders/frag/for-loop-continue-control-flow.frag b/shaders/frag/for-loop-continue-control-flow.frag new file mode 100644 index 0000000000..1f91cca2a4 --- /dev/null +++ b/shaders/frag/for-loop-continue-control-flow.frag @@ -0,0 +1,11 @@ +#version 450 +layout(location = 0) out vec4 FragColor; +void main() +{ + FragColor = vec4(0.0); + for (int i = 0; i < 3; (0 > 1) ? 1 : i ++) + { + int a = i; + FragColor[a] += float(i); + } +} diff --git a/shaders/frag/scalar-refract-reflect.frag b/shaders/frag/scalar-refract-reflect.frag new file mode 100644 index 0000000000..486ed90bd5 --- /dev/null +++ b/shaders/frag/scalar-refract-reflect.frag @@ -0,0 +1,11 @@ +#version 450 +layout(location = 0) out float FragColor; +layout(location = 0) in vec3 vRefract; + +void main() +{ + FragColor = refract(vRefract.x, vRefract.y, vRefract.z); + FragColor += reflect(vRefract.x, vRefract.y); + FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y; + FragColor += reflect(vRefract.xy, vRefract.zy).y; +} diff --git a/shaders/frag/selection-block-dominator.frag b/shaders/frag/selection-block-dominator.frag new file mode 100644 index 0000000000..257f4e6934 --- /dev/null +++ b/shaders/frag/selection-block-dominator.frag @@ -0,0 +1,18 @@ +#version 450 +layout(location = 0) out vec4 FragColor; +layout(location = 0) flat in int vIndex; + +void main() +{ + int v; + if (vIndex != 1) + { + FragColor = vec4(1.0); + return; + } + else + { + v = 10; + } + FragColor = vec4(v); +} diff --git a/tests-other/msl_resource_binding.spv b/tests-other/msl_resource_binding.spv new file mode 100644 index 0000000000..1798902467 Binary files /dev/null and b/tests-other/msl_resource_binding.spv differ diff --git a/tests-other/msl_resource_bindings.cpp b/tests-other/msl_resource_bindings.cpp new file mode 100644 index 0000000000..fcb3213e1d --- /dev/null +++ b/tests-other/msl_resource_bindings.cpp @@ -0,0 +1,86 @@ +// Testbench for MSL resource binding APIs. +// It does not validate output at the moment, but it's useful for ad-hoc testing. + +#include +#include +#include +#include + +#define SPVC_CHECKED_CALL(x) do { \ + if ((x) != SPVC_SUCCESS) { \ + fprintf(stderr, "Failed at line %d.\n", __LINE__); \ + exit(1); \ + } \ +} while(0) + +static std::vector read_file(const char *path) +{ + long len; + FILE *file = fopen(path, "rb"); + + if (!file) + return {}; + + fseek(file, 0, SEEK_END); + len = ftell(file); + rewind(file); + + std::vector buffer(len / sizeof(SpvId)); + if (fread(buffer.data(), 1, len, file) != (size_t)len) + { + fclose(file); + return {}; + } + + fclose(file); + return buffer; +} + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + auto buffer = read_file(argv[1]); + if (buffer.empty()) + return EXIT_FAILURE; + + spvc_context ctx; + spvc_parsed_ir parsed_ir; + spvc_compiler compiler; + + SPVC_CHECKED_CALL(spvc_context_create(&ctx)); + SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir)); + SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_MSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler)); + SPVC_CHECKED_CALL(spvc_compiler_msl_add_discrete_descriptor_set(compiler, 3)); + + spvc_compiler_options opts; + SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &opts)); + SPVC_CHECKED_CALL(spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS, SPVC_TRUE)); + SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_MSL_VERSION, 20000)); + SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, opts)); + + spvc_msl_resource_binding binding; + spvc_msl_resource_binding_init(&binding); + binding.binding = SPVC_MSL_ARGUMENT_BUFFER_BINDING; + binding.stage = SpvExecutionModelFragment; + binding.desc_set = 0; + binding.msl_buffer = 2; + SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding)); + + binding.desc_set = 1; + binding.msl_buffer = 3; + SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding)); + + const char *str; + SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str)); + + fprintf(stderr, "Output:\n%s\n", str); + + if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 0, SPVC_MSL_ARGUMENT_BUFFER_BINDING)) + return EXIT_FAILURE; + + if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 1, SPVC_MSL_ARGUMENT_BUFFER_BINDING)) + return EXIT_FAILURE; +} +