diff --git a/deps/SPIRV-Cross/CMakeLists.txt b/deps/SPIRV-Cross/CMakeLists.txt
index fc3e778397..aa33262ac8 100644
--- a/deps/SPIRV-Cross/CMakeLists.txt
+++ b/deps/SPIRV-Cross/CMakeLists.txt
@@ -287,7 +287,7 @@ if (SPIRV_CROSS_STATIC)
 endif()
 
 set(spirv-cross-abi-major 0)
-set(spirv-cross-abi-minor 14)
+set(spirv-cross-abi-minor 16)
 set(spirv-cross-abi-patch 0)
 
 if (SPIRV_CROSS_SHARED)
@@ -457,6 +457,10 @@ if (SPIRV_CROSS_CLI)
 				target_link_libraries(spirv-cross-msl-constexpr-test spirv-cross-c)
 				set_target_properties(spirv-cross-msl-constexpr-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
 
+				add_executable(spirv-cross-msl-resource-binding-test tests-other/msl_resource_bindings.cpp)
+				target_link_libraries(spirv-cross-msl-resource-binding-test spirv-cross-c)
+				set_target_properties(spirv-cross-msl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}")
+
 				if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang"))
 					target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra)
 				endif()
@@ -469,6 +473,8 @@ if (SPIRV_CROSS_CLI)
 						COMMAND $<TARGET_FILE:spirv-cross-small-vector-test>)
 				add_test(NAME spirv-cross-msl-constexpr-test
 						COMMAND $<TARGET_FILE:spirv-cross-msl-constexpr-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_constexpr_test.spv)
+				add_test(NAME spirv-cross-msl-resource-binding-test
+						COMMAND $<TARGET_FILE:spirv-cross-msl-resource-binding-test> ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_resource_binding.spv)
 				add_test(NAME spirv-cross-test
 						COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --parallel
 						${spirv-cross-externals}
diff --git a/deps/SPIRV-Cross/main.cpp b/deps/SPIRV-Cross/main.cpp
index 5931aef3f7..c441d23581 100644
--- a/deps/SPIRV-Cross/main.cpp
+++ b/deps/SPIRV-Cross/main.cpp
@@ -514,6 +514,7 @@ struct CLIArguments
 	bool msl_domain_lower_left = false;
 	bool msl_argument_buffers = false;
 	bool msl_texture_buffer_native = false;
+	bool msl_multiview = false;
 	bool glsl_emit_push_constant_as_ubo = false;
 	bool glsl_emit_ubo_as_plain_uniforms = false;
 	bool emit_line_directives = false;
@@ -592,6 +593,7 @@ static void print_help()
 	                "\t[--msl-argument-buffers]\n"
 	                "\t[--msl-texture-buffer-native]\n"
 	                "\t[--msl-discrete-descriptor-set <index>]\n"
+	                "\t[--msl-multiview]\n"
 	                "\t[--hlsl]\n"
 	                "\t[--reflect]\n"
 	                "\t[--shader-model]\n"
@@ -750,6 +752,7 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
 		msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left;
 		msl_opts.argument_buffers = args.msl_argument_buffers;
 		msl_opts.texture_buffer_native = args.msl_texture_buffer_native;
+		msl_opts.multiview = args.msl_multiview;
 		msl_comp->set_msl_options(msl_opts);
 		for (auto &v : args.msl_discrete_descriptor_sets)
 			msl_comp->add_discrete_descriptor_set(v);
@@ -1069,6 +1072,7 @@ static int main_inner(int argc, char *argv[])
 	cbs.add("--msl-discrete-descriptor-set",
 	        [&args](CLIParser &parser) { args.msl_discrete_descriptor_sets.push_back(parser.next_uint()); });
 	cbs.add("--msl-texture-buffer-native", [&args](CLIParser &) { args.msl_texture_buffer_native = true; });
+	cbs.add("--msl-multiview", [&args](CLIParser &) { args.msl_multiview = true; });
 	cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
 	cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
 		auto old_name = parser.next_string();
diff --git a/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
index 908fde0457..948806db4e 100644
--- a/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
+++ b/deps/SPIRV-Cross/reference/opt/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
@@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)
 
 kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
 {
-    uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
     uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
     uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
     uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
diff --git a/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag b/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
index fa4953fdb2..ba66ccf626 100644
--- a/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
+++ b/deps/SPIRV-Cross/reference/shaders-hlsl-no-opt/asm/frag/switch-block-case-fallthrough.asm.frag
@@ -30,16 +30,23 @@ void frag_main()
         case 0:
         {
             _30 = 3;
+            j = _30;
+            _31 = 0;
+            j = _31;
+            break;
         }
         default:
         {
             j = _30;
             _31 = 0;
+            j = _31;
+            break;
         }
         case 1:
         case 11:
         {
             j = _31;
+            break;
         }
         case 2:
         {
@@ -59,6 +66,8 @@ void frag_main()
         }
         case 4:
         {
+            i = 0;
+            break;
         }
         case 5:
         {
diff --git a/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp b/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
index e52fb209f4..4ebab8c734 100644
--- a/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
+++ b/deps/SPIRV-Cross/reference/shaders-msl/vulkan/comp/subgroups.nocompat.invalid.vk.msl21.comp
@@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)
 
 kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
 {
-    uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
     uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
     uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
     uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
diff --git a/deps/SPIRV-Cross/spirv_cfg.cpp b/deps/SPIRV-Cross/spirv_cfg.cpp
index cc1db1c737..ed31f23679 100644
--- a/deps/SPIRV-Cross/spirv_cfg.cpp
+++ b/deps/SPIRV-Cross/spirv_cfg.cpp
@@ -74,8 +74,14 @@ bool CFG::is_back_edge(uint32_t to) const
 	// We have a back edge if the visit order is set with the temporary magic value 0.
 	// Crossing edges will have already been recorded with a visit order.
 	auto itr = visit_order.find(to);
-	assert(itr != end(visit_order));
-	return itr->second.get() == 0;
+	return itr != end(visit_order) && itr->second.get() == 0;
+}
+
+bool CFG::has_visited_forward_edge(uint32_t to) const
+{
+	// If > 0, we have visited the edge already, and this is not a back edge branch.
+	auto itr = visit_order.find(to);
+	return itr != end(visit_order) && itr->second.get() > 0;
 }
 
 bool CFG::post_order_visit(uint32_t block_id)
@@ -83,8 +89,10 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// If we have already branched to this block (back edge), stop recursion.
 	// If our branches are back-edges, we do not record them.
 	// We have to record crossing edges however.
-	if (visit_order[block_id].get() >= 0)
-		return !is_back_edge(block_id);
+	if (has_visited_forward_edge(block_id))
+		return true;
+	else if (is_back_edge(block_id))
+		return false;
 
 	// Block back-edges from recursively revisiting ourselves.
 	visit_order[block_id].get() = 0;
@@ -123,9 +131,39 @@ bool CFG::post_order_visit(uint32_t block_id)
 	// This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners.
 	// To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block.
 	// This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator.
-	if (block.merge == SPIRBlock::MergeLoop)
-		if (post_order_visit(block.merge_block))
-			add_branch(block_id, block.merge_block);
+	// We could use has_visited_forward_edge, but this break code-gen where the merge block is unreachable in the CFG.
+	if (block.merge == SPIRBlock::MergeLoop && post_order_visit(block.merge_block))
+		add_branch(block_id, block.merge_block);
+
+	// If this is a selection merge, add an implied branch to the merge target.
+	// This is needed to avoid cases where an inner branch dominates the outer branch.
+	// This can happen if one of the branches exit early, e.g.:
+	// if (cond) { ...; break; } else { var = 100 } use_var(var);
+	// We can use the variable without a Phi since there is only one possible parent here.
+	// However, in this case, we need to hoist out the inner variable to outside the branch.
+	// Use same strategy as loops.
+	if (block.merge == SPIRBlock::MergeSelection && post_order_visit(block.next_block))
+	{
+		// If there is only one preceding edge to the merge block and it's not ourselves, we need a fixup.
+		// Add a fake branch so any dominator in either the if (), or else () block, or a lone case statement
+		// will be hoisted out to outside the selection merge.
+		// If size > 1, the variable will be automatically hoisted, so we should not mess with it.
+		// Adding fake branches unconditionally breaks parameter preservation analysis,
+		// which looks at how variables are accessed through the CFG.
+		auto pred_itr = preceding_edges.find(block.next_block);
+		if (pred_itr != end(preceding_edges))
+		{
+			auto &pred = pred_itr->second;
+			if (pred.size() == 1 && *pred.begin() != block_id)
+				add_branch(block_id, block.next_block);
+		}
+		else
+		{
+			// If the merge block does not have any preceding edges, i.e. unreachable, hallucinate it.
+			// We're going to do code-gen for it, and domination analysis requires that we have at least one preceding edge.
+			add_branch(block_id, block.next_block);
+		}
+	}
 
 	// Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges.
 	visit_order[block_id].get() = ++visit_count;
diff --git a/deps/SPIRV-Cross/spirv_cfg.hpp b/deps/SPIRV-Cross/spirv_cfg.hpp
index 2262eb79b0..7d07d48410 100644
--- a/deps/SPIRV-Cross/spirv_cfg.hpp
+++ b/deps/SPIRV-Cross/spirv_cfg.hpp
@@ -127,6 +127,7 @@ private:
 	uint32_t visit_count = 0;
 
 	bool is_back_edge(uint32_t to) const;
+	bool has_visited_forward_edge(uint32_t to) const;
 };
 
 class DominatorBuilder
diff --git a/deps/SPIRV-Cross/spirv_common.hpp b/deps/SPIRV-Cross/spirv_common.hpp
index 1f9bce681e..d7a5f32eca 100644
--- a/deps/SPIRV-Cross/spirv_common.hpp
+++ b/deps/SPIRV-Cross/spirv_common.hpp
@@ -476,6 +476,7 @@ struct SPIRExtension : IVariant
 	{
 		Unsupported,
 		GLSL,
+		SPV_debug_info,
 		SPV_AMD_shader_ballot,
 		SPV_AMD_shader_explicit_vertex_parameter,
 		SPV_AMD_shader_trinary_minmax,
diff --git a/deps/SPIRV-Cross/spirv_cross.cpp b/deps/SPIRV-Cross/spirv_cross.cpp
index 4011c849bf..9fdfd1f00c 100644
--- a/deps/SPIRV-Cross/spirv_cross.cpp
+++ b/deps/SPIRV-Cross/spirv_cross.cpp
@@ -3079,6 +3079,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
 
 	case OpArrayLength:
 	case OpLine:
+	case OpNoLine:
 		// Uses literals, but cannot be a phi variable or temporary, so ignore.
 		break;
 
@@ -4266,6 +4267,7 @@ bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &resul
 	case OpGroupCommitReadPipe:
 	case OpGroupCommitWritePipe:
 	case OpLine:
+	case OpNoLine:
 		return false;
 
 	default:
diff --git a/deps/SPIRV-Cross/spirv_cross.hpp b/deps/SPIRV-Cross/spirv_cross.hpp
index ccd7c04a38..c2dc4ea61b 100644
--- a/deps/SPIRV-Cross/spirv_cross.hpp
+++ b/deps/SPIRV-Cross/spirv_cross.hpp
@@ -258,7 +258,7 @@ public:
 	size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const;
 
 	// Returns the effective size of a buffer block struct member.
-	virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
+	size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const;
 
 	// Returns a set of all global variables which are statically accessed
 	// by the control flow graph from the current entry point.
diff --git a/deps/SPIRV-Cross/spirv_cross_c.cpp b/deps/SPIRV-Cross/spirv_cross_c.cpp
index 8f68a53886..8048274f00 100644
--- a/deps/SPIRV-Cross/spirv_cross_c.cpp
+++ b/deps/SPIRV-Cross/spirv_cross_c.cpp
@@ -1623,6 +1623,16 @@ spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler c
 	return SPVC_SUCCESS;
 }
 
+spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type struct_type, unsigned index, size_t *size)
+{
+	SPVC_BEGIN_SAFE_SCOPE
+	{
+		*size = compiler->compiler->get_declared_struct_member_size(*static_cast<const SPIRType *>(struct_type), index);
+	}
+	SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT)
+	return SPVC_SUCCESS;
+}
+
 spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset)
 {
 	SPVC_BEGIN_SAFE_SCOPE
diff --git a/deps/SPIRV-Cross/spirv_cross_c.h b/deps/SPIRV-Cross/spirv_cross_c.h
index afa5a832da..6efaf8543e 100644
--- a/deps/SPIRV-Cross/spirv_cross_c.h
+++ b/deps/SPIRV-Cross/spirv_cross_c.h
@@ -33,7 +33,7 @@ extern "C" {
 /* Bumped if ABI or API breaks backwards compatibility. */
 #define SPVC_C_API_VERSION_MAJOR 0
 /* Bumped if APIs or enumerations are added in a backwards compatible way. */
-#define SPVC_C_API_VERSION_MINOR 14
+#define SPVC_C_API_VERSION_MINOR 16
 /* Bumped if internal implementation details change. */
 #define SPVC_C_API_VERSION_PATCH 0
 
@@ -303,6 +303,7 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *b
 #define SPVC_MSL_PUSH_CONSTANT_BINDING (0)
 #define SPVC_MSL_SWIZZLE_BUFFER_BINDING (~(1u))
 #define SPVC_MSL_BUFFER_SIZE_BUFFER_BINDING (~(2u))
+#define SPVC_MSL_ARGUMENT_BUFFER_BINDING (~(3u))
 
 /* Obsolete. Sticks around for backwards compatibility. */
 #define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1
@@ -659,6 +660,7 @@ SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_typ
 SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size);
 SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler,
                                                                                  spvc_type struct_type, size_t array_size, size_t *size);
+SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size);
 
 SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler,
                                                                     spvc_type type, unsigned index, unsigned *offset);
diff --git a/deps/SPIRV-Cross/spirv_glsl.cpp b/deps/SPIRV-Cross/spirv_glsl.cpp
index 44ccfc0540..c207eebf4f 100644
--- a/deps/SPIRV-Cross/spirv_glsl.cpp
+++ b/deps/SPIRV-Cross/spirv_glsl.cpp
@@ -832,8 +832,6 @@ void CompilerGLSL::emit_struct(SPIRType &type)
 string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
 {
 	string res;
-	if (flags.get(DecorationNonUniformEXT))
-		res += "nonuniformEXT ";
 	//if (flags & (1ull << DecorationSmooth))
 	//    res += "smooth ";
 	if (flags.get(DecorationFlat))
@@ -4602,7 +4600,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 
 	SmallVector<uint32_t> inherited_expressions;
 
-	uint32_t result_type = ops[0];
+	uint32_t result_type_id = ops[0];
 	uint32_t id = ops[1];
 	uint32_t img = ops[2];
 	uint32_t coord = ops[3];
@@ -4613,8 +4611,14 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 	bool fetch = false;
 	const uint32_t *opt = nullptr;
 
+	auto &result_type = get<SPIRType>(result_type_id);
+
 	inherited_expressions.push_back(coord);
 
+	// Make sure non-uniform decoration is back-propagated to where it needs to be.
+	if (has_decoration(img, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(img);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@@ -4771,14 +4775,21 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
 			image_is_depth = true;
 
 		if (image_is_depth)
-			expr = remap_swizzle(get<SPIRType>(result_type), 1, expr);
+			expr = remap_swizzle(result_type, 1, expr);
+	}
+
+	if (!backend.support_small_type_sampling_result && result_type.width < 32)
+	{
+		// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
+		// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
+		expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
 	}
 
 	// Deals with reads from MSL. We might need to downconvert to fewer components.
 	if (op == OpImageRead)
-		expr = remap_swizzle(get<SPIRType>(result_type), 4, expr);
+		expr = remap_swizzle(result_type, 4, expr);
 
-	emit_op(result_type, id, expr, forward);
+	emit_op(result_type_id, id, expr, forward);
 	for (auto &inherit : inherited_expressions)
 		inherit_expression_dependencies(id, inherit);
 
@@ -7107,18 +7118,23 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
 	return expr;
 }
 
+void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
+{
+	if (var.allocate_temporary_copy)
+	{
+		auto &type = get<SPIRType>(var.basetype);
+		auto &flags = get_decoration_bitset(var.self);
+		statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
+	}
+}
+
 void CompilerGLSL::flush_variable_declaration(uint32_t id)
 {
 	auto *var = maybe_get<SPIRVariable>(id);
 	if (var && var->deferred_declaration)
 	{
 		statement(variable_decl_function_local(*var), ";");
-		if (var->allocate_temporary_copy)
-		{
-			auto &type = get<SPIRType>(var->basetype);
-			auto &flags = ir.meta[id].decoration.decoration_flags;
-			statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), ";");
-		}
+		emit_variable_temporary_copies(*var);
 		var->deferred_declaration = false;
 	}
 }
@@ -7553,8 +7569,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		unroll_array_from_complex_load(id, ptr, expr);
 
 		auto &type = get<SPIRType>(result_type);
-		if (has_decoration(id, DecorationNonUniformEXT))
+		// Shouldn't need to check for ID, but current glslang codegen requires it in some cases
+		// when loading Image/Sampler descriptors. It does not hurt to check ID as well.
+		if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT))
+		{
+			propagate_nonuniform_qualifier(ptr);
 			convert_non_uniform_expression(type, expr);
+		}
 
 		if (ptr_expression)
 			ptr_expression->need_transpose = old_need_transpose;
@@ -7636,6 +7657,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 	{
 		auto *var = maybe_get<SPIRVariable>(ops[0]);
 
+		if (has_decoration(ops[0], DecorationNonUniformEXT))
+			propagate_nonuniform_qualifier(ops[0]);
+
 		if (var && var->statically_assigned)
 			var->static_expression = ops[1];
 		else if (var && var->loop_variable && !var->loop_variable_enable)
@@ -8018,7 +8042,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t rhs = ops[2];
 		bool pointer = get<SPIRType>(result_type).pointer;
 
-		if (expression_is_lvalue(rhs) && !pointer)
+		auto *chain = maybe_get<SPIRAccessChain>(rhs);
+		if (chain)
+		{
+			// Cannot lower to a SPIRExpression, just copy the object.
+			auto &e = set<SPIRAccessChain>(id, *chain);
+			e.self = id;
+		}
+		else if (expression_is_lvalue(rhs) && !pointer)
 		{
 			// Need a copy.
 			// For pointer types, we copy the pointer itself.
@@ -8037,6 +8068,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 				auto *var = maybe_get_backing_variable(rhs);
 				e.loaded_from = var ? var->self : 0;
 			}
+
+			// If we're copying an access chain, need to inherit the read expressions.
+			auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
+			if (rhs_expr)
+				e.implied_read_expressions = rhs_expr->implied_read_expressions;
 		}
 		break;
 	}
@@ -8971,6 +9007,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		uint32_t result_type = ops[0];
 		uint32_t id = ops[1];
 		emit_sampled_image_op(result_type, id, ops[2], ops[3]);
+		inherit_expression_dependencies(id, ops[2]);
+		inherit_expression_dependencies(id, ops[3]);
 		break;
 	}
 
@@ -9392,6 +9430,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		{
 			emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
 		}
+		else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
+		{
+			break; // Ignore SPIR-V debug information extended instructions.
+		}
 		else
 		{
 			statement("// unimplemented ext op ", instruction.op);
@@ -9658,6 +9700,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	case OpNoLine:
+		break;
+
 	default:
 		statement("// unimplemented op ", instruction.op);
 		break;
@@ -9873,7 +9918,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
 
 const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
 {
-	return flags_to_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags);
+	auto &type = expression_type(id);
+	bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
+	if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
+	{
+		// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
+		auto &result_type = get<SPIRType>(type.image.type);
+		if (result_type.width < 32)
+			return "mediump ";
+	}
+	return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
 }
 
 string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
@@ -10089,15 +10143,22 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
 	switch (imagetype.basetype)
 	{
 	case SPIRType::Int:
+	case SPIRType::Short:
+	case SPIRType::SByte:
 		res = "i";
 		break;
 	case SPIRType::UInt:
+	case SPIRType::UShort:
+	case SPIRType::UByte:
 		res = "u";
 		break;
 	default:
 		break;
 	}
 
+	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
+	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
+
 	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
 		return res + "subpassInput" + (type.image.ms ? "MS" : "");
 
@@ -11189,7 +11250,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 			}
 
 			default:
-				SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
 			}
 
 			begin_scope();
@@ -11263,7 +11327,10 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
 			}
 
 			default:
-				SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
+				block.disable_block_optimization = true;
+				force_recompile();
+				begin_scope(); // We'll see an end_scope() later.
+				return false;
 			}
 
 			begin_scope();
@@ -11325,8 +11392,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 		continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
 
 	// If we have loop variables, stop masking out access to the variable now.
-	for (auto var : block.loop_variables)
-		get<SPIRVariable>(var).loop_variable_enable = true;
+	for (auto var_id : block.loop_variables)
+	{
+		auto &var = get<SPIRVariable>(var_id);
+		var.loop_variable_enable = true;
+		// We're not going to declare the variable directly, so emit a copy here.
+		emit_variable_temporary_copies(var);
+	}
 
 	// Remember deferred declaration state. We will restore it before returning.
 	SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
@@ -11667,7 +11739,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
 			}
 
 			auto &case_block = get<SPIRBlock>(target_block);
-			if (i + 1 < num_blocks &&
+			if (backend.support_case_fallthrough && i + 1 < num_blocks &&
 			    execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
 			{
 				// We will fall through here, so just terminate the block chain early.
@@ -12190,3 +12262,37 @@ void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
 		statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
 	}
 }
+
+void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
+{
+	// SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen,
+	// we need to know NonUniformEXT a little earlier, when the resource is actually loaded.
+	// Back-propagate the qualifier based on the expression dependency chain.
+
+	if (!has_decoration(id, DecorationNonUniformEXT))
+	{
+		set_decoration(id, DecorationNonUniformEXT);
+		force_recompile();
+	}
+
+	auto *e = maybe_get<SPIRExpression>(id);
+	auto *combined = maybe_get<SPIRCombinedImageSampler>(id);
+	auto *chain = maybe_get<SPIRAccessChain>(id);
+	if (e)
+	{
+		for (auto &expr : e->expression_dependencies)
+			propagate_nonuniform_qualifier(expr);
+		for (auto &expr : e->implied_read_expressions)
+			propagate_nonuniform_qualifier(expr);
+	}
+	else if (combined)
+	{
+		propagate_nonuniform_qualifier(combined->image);
+		propagate_nonuniform_qualifier(combined->sampler);
+	}
+	else if (chain)
+	{
+		for (auto &expr : chain->implied_read_expressions)
+			propagate_nonuniform_qualifier(expr);
+	}
+}
diff --git a/deps/SPIRV-Cross/spirv_glsl.hpp b/deps/SPIRV-Cross/spirv_glsl.hpp
index 45a8654c52..a6fac92662 100644
--- a/deps/SPIRV-Cross/spirv_glsl.hpp
+++ b/deps/SPIRV-Cross/spirv_glsl.hpp
@@ -404,6 +404,8 @@ protected:
 		bool array_is_value_type = true;
 		bool comparison_image_samples_scalar = false;
 		bool native_pointers = false;
+		bool support_small_type_sampling_result = false;
+		bool support_case_fallthrough = true;
 	} backend;
 
 	void emit_struct(SPIRType &type);
@@ -432,6 +434,7 @@ protected:
 	bool flush_phi_required(uint32_t from, uint32_t to);
 	void flush_variable_declaration(uint32_t id);
 	void flush_undeclared_variables(SPIRBlock &block);
+	void emit_variable_temporary_copies(const SPIRVariable &var);
 
 	bool should_dereference(uint32_t id);
 	bool should_forward(uint32_t id);
@@ -669,6 +672,8 @@ protected:
 	void fixup_type_alias();
 	void reorder_type_alias();
 
+	void propagate_nonuniform_qualifier(uint32_t id);
+
 private:
 	void init();
 };
diff --git a/deps/SPIRV-Cross/spirv_hlsl.cpp b/deps/SPIRV-Cross/spirv_hlsl.cpp
index c129966fe4..57bbef8b81 100644
--- a/deps/SPIRV-Cross/spirv_hlsl.cpp
+++ b/deps/SPIRV-Cross/spirv_hlsl.cpp
@@ -1742,6 +1742,36 @@ void CompilerHLSL::emit_resources()
 		end_scope();
 		statement("");
 	}
+
+	if (requires_scalar_reflect)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float SPIRV_Cross_Reflect(float i, float n)");
+		begin_scope();
+		statement("return i - 2.0 * dot(n, i) * n;");
+		end_scope();
+		statement("");
+	}
+
+	if (requires_scalar_refract)
+	{
+		// FP16/FP64? No templates in HLSL.
+		statement("float SPIRV_Cross_Refract(float i, float n, float eta)");
+		begin_scope();
+		statement("float NoI = n * i;");
+		statement("float NoI2 = NoI * NoI;");
+		statement("float k = 1.0 - eta * eta * (1.0 - NoI2);");
+		statement("if (k < 0.0)");
+		begin_scope();
+		statement("return 0.0;");
+		end_scope();
+		statement("else");
+		begin_scope();
+		statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+		end_scope();
+		end_scope();
+		statement("");
+	}
 }
 
 string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index)
@@ -2449,6 +2479,10 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
 
 	inherited_expressions.push_back(coord);
 
+	// Make sure non-uniform decoration is back-propagated to where it needs to be.
+	if (has_decoration(img, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(img);
+
 	switch (op)
 	{
 	case OpImageSampleDrefImplicitLod:
@@ -3245,6 +3279,45 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		break;
 	}
 
+	case GLSLstd450Normalize:
+		// HLSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Returns -1 or 1 for valid input, sign() does the job.
+			emit_unary_func_op(result_type, id, args[0], "sign");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Reflect:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_reflect)
+			{
+				requires_scalar_reflect = true;
+				force_recompile();
+			}
+			emit_binary_func_op(result_type, id, args[0], args[1], "SPIRV_Cross_Reflect");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Refract:
+		if (get<SPIRType>(result_type).vecsize == 1)
+		{
+			if (!requires_scalar_refract)
+			{
+				requires_scalar_refract = true;
+				force_recompile();
+			}
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "SPIRV_Cross_Refract");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
@@ -3389,6 +3462,9 @@ void CompilerHLSL::emit_load(const Instruction &instruction)
 		uint32_t id = ops[1];
 		uint32_t ptr = ops[2];
 
+		if (has_decoration(ptr, DecorationNonUniformEXT))
+			propagate_nonuniform_qualifier(ptr);
+
 		auto load_expr = read_access_chain(*chain);
 
 		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
@@ -3422,6 +3498,9 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 	// Make sure we trigger a read of the constituents in the access chain.
 	track_expression_read(chain.self);
 
+	if (has_decoration(chain.self, DecorationNonUniformEXT))
+		propagate_nonuniform_qualifier(chain.self);
+
 	SPIRType target_type;
 	target_type.basetype = SPIRType::UInt;
 	target_type.vecsize = type.vecsize;
@@ -3954,6 +4033,31 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
+	case OpOuterProduct:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2];
+		uint32_t b = ops[3];
+
+		auto &type = get<SPIRType>(result_type);
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+		for (uint32_t col = 0; col < type.columns; col++)
+		{
+			expr += to_enclosed_expression(a);
+			expr += " * ";
+			expr += to_extract_component_expression(b, col);
+			if (col + 1 < type.columns)
+				expr += ", ";
+		}
+		expr += ")";
+		emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
+
 	case OpFMod:
 	{
 		if (!requires_op_fmod)
@@ -4700,6 +4804,7 @@ string CompilerHLSL::compile()
 	backend.can_declare_arrays_inline = false;
 	backend.can_return_array = false;
 	backend.nonuniform_qualifier = "NonUniformResourceIndex";
+	backend.support_case_fallthrough = false;
 
 	fixup_type_alias();
 	reorder_type_alias();
diff --git a/deps/SPIRV-Cross/spirv_hlsl.hpp b/deps/SPIRV-Cross/spirv_hlsl.hpp
index d96c911f83..796f502c5d 100644
--- a/deps/SPIRV-Cross/spirv_hlsl.hpp
+++ b/deps/SPIRV-Cross/spirv_hlsl.hpp
@@ -167,6 +167,8 @@ private:
 	void replace_illegal_names() override;
 
 	Options hlsl_options;
+
+	// TODO: Refactor this to be more similar to MSL, maybe have some common system in place?
 	bool requires_op_fmod = false;
 	bool requires_fp16_packing = false;
 	bool requires_explicit_fp16_packing = false;
@@ -179,6 +181,8 @@ private:
 	bool requires_inverse_2x2 = false;
 	bool requires_inverse_3x3 = false;
 	bool requires_inverse_4x4 = false;
+	bool requires_scalar_reflect = false;
+	bool requires_scalar_refract = false;
 	uint64_t required_textureSizeVariants = 0;
 	void require_texture_query_variant(const SPIRType &type);
 
diff --git a/deps/SPIRV-Cross/spirv_msl.cpp b/deps/SPIRV-Cross/spirv_msl.cpp
index b3097ab826..e73d7cc47d 100644
--- a/deps/SPIRV-Cross/spirv_msl.cpp
+++ b/deps/SPIRV-Cross/spirv_msl.cpp
@@ -105,8 +105,10 @@ void CompilerMSL::build_implicit_builtins()
 	    active_input_builtins.get(BuiltInSubgroupLtMask);
 	bool need_subgroup_ge_mask = !msl_options.is_ios() && (active_input_builtins.get(BuiltInSubgroupGeMask) ||
 	                                                       active_input_builtins.get(BuiltInSubgroupGtMask));
+	bool need_multiview = get_execution_model() == ExecutionModelVertex &&
+	                      (msl_options.multiview || active_input_builtins.get(BuiltInViewIndex));
 	if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
-	    needs_subgroup_invocation_id)
+	    need_multiview || needs_subgroup_invocation_id)
 	{
 		bool has_frag_coord = false;
 		bool has_sample_id = false;
@@ -118,6 +120,7 @@ void CompilerMSL::build_implicit_builtins()
 		bool has_primitive_id = false;
 		bool has_subgroup_invocation_id = false;
 		bool has_subgroup_size = false;
+		bool has_view_idx = false;
 
 		ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
 			if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin)
@@ -189,6 +192,22 @@ void CompilerMSL::build_implicit_builtins()
 				builtin_subgroup_size_id = var.self;
 				has_subgroup_size = true;
 			}
+
+			if (need_multiview)
+			{
+				if (builtin == BuiltInInstanceIndex)
+				{
+					// The view index here is derived from the instance index.
+					builtin_instance_idx_id = var.self;
+					has_instance_idx = true;
+				}
+
+				if (builtin == BuiltInViewIndex)
+				{
+					builtin_view_idx_id = var.self;
+					has_view_idx = true;
+				}
+			}
 		});
 
 		if (!has_frag_coord && need_subpass_input)
@@ -246,7 +265,8 @@ void CompilerMSL::build_implicit_builtins()
 			mark_implicit_builtin(StorageClassInput, BuiltInSampleId, var_id);
 		}
 
-		if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance))
+		if ((need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) ||
+		    (need_multiview && (!has_instance_idx || !has_view_idx)))
 		{
 			uint32_t offset = ir.increase_bound_by(2);
 			uint32_t type_id = offset;
@@ -265,7 +285,7 @@ void CompilerMSL::build_implicit_builtins()
 			auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr);
 			ptr_type.self = type_id;
 
-			if (!has_vertex_idx)
+			if (need_vertex_params && !has_vertex_idx)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -276,7 +296,7 @@ void CompilerMSL::build_implicit_builtins()
 				mark_implicit_builtin(StorageClassInput, BuiltInVertexIndex, var_id);
 			}
 
-			if (!has_base_vertex)
+			if (need_vertex_params && !has_base_vertex)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -287,7 +307,7 @@ void CompilerMSL::build_implicit_builtins()
 				mark_implicit_builtin(StorageClassInput, BuiltInBaseVertex, var_id);
 			}
 
-			if (!has_instance_idx)
+			if (!has_instance_idx) // Needed by both multiview and tessellation
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -296,9 +316,30 @@ void CompilerMSL::build_implicit_builtins()
 				set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex);
 				builtin_instance_idx_id = var_id;
 				mark_implicit_builtin(StorageClassInput, BuiltInInstanceIndex, var_id);
+
+				if (need_multiview)
+				{
+					// Multiview shaders are not allowed to write to gl_Layer, ostensibly because
+					// it is implicitly written from gl_ViewIndex, but we have to do that explicitly.
+					// Note that we can't just abuse gl_ViewIndex for this purpose: it's an input, but
+					// gl_Layer is an output in vertex-pipeline shaders.
+					uint32_t type_ptr_out_id = ir.increase_bound_by(2);
+					SPIRType uint_type_ptr_out;
+					uint_type_ptr_out = uint_type;
+					uint_type_ptr_out.pointer = true;
+					uint_type_ptr_out.parent_type = type_id;
+					uint_type_ptr_out.storage = StorageClassOutput;
+					auto &ptr_out_type = set<SPIRType>(type_ptr_out_id, uint_type_ptr_out);
+					ptr_out_type.self = type_id;
+					var_id = type_ptr_out_id + 1;
+					set<SPIRVariable>(var_id, type_ptr_out_id, StorageClassOutput);
+					set_decoration(var_id, DecorationBuiltIn, BuiltInLayer);
+					builtin_layer_id = var_id;
+					mark_implicit_builtin(StorageClassOutput, BuiltInLayer, var_id);
+				}
 			}
 
-			if (!has_base_instance)
+			if (need_vertex_params && !has_base_instance)
 			{
 				uint32_t var_id = ir.increase_bound_by(1);
 
@@ -308,6 +349,17 @@ void CompilerMSL::build_implicit_builtins()
 				builtin_base_instance_id = var_id;
 				mark_implicit_builtin(StorageClassInput, BuiltInBaseInstance, var_id);
 			}
+
+			if (need_multiview && !has_view_idx)
+			{
+				uint32_t var_id = ir.increase_bound_by(1);
+
+				// Create gl_ViewIndex.
+				set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput);
+				set_decoration(var_id, DecorationBuiltIn, BuiltInViewIndex);
+				builtin_view_idx_id = var_id;
+				mark_implicit_builtin(StorageClassInput, BuiltInViewIndex, var_id);
+			}
 		}
 
 		if (need_tesc_params && (!has_invocation_id || !has_primitive_id))
@@ -428,6 +480,17 @@ void CompilerMSL::build_implicit_builtins()
 		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.buffer_size_buffer_index);
 		buffer_size_buffer_id = var_id;
 	}
+
+	if (needs_view_mask_buffer())
+	{
+		uint32_t var_id = build_constant_uint_array_pointer();
+		set_name(var_id, "spvViewMask");
+		// This should never match anything.
+		set_decoration(var_id, DecorationDescriptorSet, ~(4u));
+		set_decoration(var_id, DecorationBinding, msl_options.view_mask_buffer_index);
+		set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index);
+		view_mask_buffer_id = var_id;
+	}
 }
 
 void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id)
@@ -708,10 +771,15 @@ string CompilerMSL::compile()
 	backend.comparison_image_samples_scalar = true;
 	backend.native_pointers = true;
 	backend.nonuniform_qualifier = "";
+	backend.support_small_type_sampling_result = true;
 
 	capture_output_to_buffer = msl_options.capture_output_to_buffer;
 	is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer;
 
+	// Initialize array here rather than constructor, MSVC 2013 workaround.
+	for (auto &id : next_metal_resource_ids)
+		id = 0;
+
 	fixup_type_alias();
 	replace_illegal_names();
 
@@ -731,6 +799,10 @@ string CompilerMSL::compile()
 		active_interface_variables.insert(swizzle_buffer_id);
 	if (buffer_size_buffer_id)
 		active_interface_variables.insert(buffer_size_buffer_id);
+	if (view_mask_buffer_id)
+		active_interface_variables.insert(view_mask_buffer_id);
+	if (builtin_layer_id)
+		active_interface_variables.insert(builtin_layer_id);
 
 	// Create structs to hold input, output and uniform variables.
 	// Do output first to ensure out. is declared at top of entry function.
@@ -1020,7 +1092,6 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
 					added_out = true;
 				}
 				type_id = get<SPIRVariable>(arg_id).basetype;
-				p_type = &get<SPIRType>(type_id);
 				uint32_t next_id = ir.increase_bound_by(1);
 				func.add_parameter(type_id, next_id, true);
 				set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
@@ -2370,7 +2441,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
 		// Increment the current offset to be positioned immediately after the current member.
 		// Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here.
 		if (mbr_idx + 1 < mbr_cnt)
-			curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx));
+			curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size_msl(ib_type, mbr_idx));
 	}
 }
 
@@ -3122,6 +3193,36 @@ void CompilerMSL::emit_custom_functions()
 			statement("");
 			break;
 
+		case SPVFuncImplReflectScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvReflect(T i, T n)");
+			begin_scope();
+			statement("return i - T(2) * i * n * n;");
+			end_scope();
+			statement("");
+			break;
+
+		case SPVFuncImplRefractScalar:
+			// Metal does not support scalar versions of these functions.
+			statement("template<typename T>");
+			statement("inline T spvRefract(T i, T n, T eta)");
+			begin_scope();
+			statement("T NoI = n * i;");
+			statement("T NoI2 = NoI * NoI;");
+			statement("T k = T(1) - eta * eta * (T(1) - NoI2);");
+			statement("if (k < T(0))");
+			begin_scope();
+			statement("return T(0);");
+			end_scope();
+			statement("else");
+			begin_scope();
+			statement("return eta * i - (eta * NoI + sqrt(k)) * n;");
+			end_scope();
+			end_scope();
+			statement("");
+			break;
+
 		default:
 			break;
 		}
@@ -4096,7 +4197,30 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		break;
 	}
 
-		// OpOuterProduct
+	case OpOuterProduct:
+	{
+		uint32_t result_type = ops[0];
+		uint32_t id = ops[1];
+		uint32_t a = ops[2];
+		uint32_t b = ops[3];
+
+		auto &type = get<SPIRType>(result_type);
+		string expr = type_to_glsl_constructor(type);
+		expr += "(";
+		for (uint32_t col = 0; col < type.columns; col++)
+		{
+			expr += to_enclosed_expression(a);
+			expr += " * ";
+			expr += to_extract_component_expression(b, col);
+			if (col + 1 < type.columns)
+				expr += ", ";
+		}
+		expr += ")";
+		emit_op(result_type, id, expr, should_forward(a) && should_forward(b));
+		inherit_expression_dependencies(id, a);
+		inherit_expression_dependencies(id, b);
+		break;
+	}
 
 	case OpIAddCarry:
 	case OpISubBorrow:
@@ -4582,6 +4706,57 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		//        GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
 		//        GLSLstd450InterpolateAtOffset
 
+	case GLSLstd450Distance:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Equivalent to length(a - b) -> abs(a - b).
+			emit_op(result_type, id,
+			        join("abs(", to_unpacked_expression(args[0]), " - ", to_unpacked_expression(args[1]), ")"),
+			        should_forward(args[0]) && should_forward(args[1]));
+			inherit_expression_dependencies(id, args[0]);
+			inherit_expression_dependencies(id, args[1]);
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Length:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Equivalent to abs().
+			emit_unary_func_op(result_type, id, args[0], "abs");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Normalize:
+		// MSL does not support scalar versions here.
+		if (expression_type(args[0]).vecsize == 1)
+		{
+			// Returns -1 or 1 for valid input, sign() does the job.
+			emit_unary_func_op(result_type, id, args[0], "sign");
+		}
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Reflect:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
+	case GLSLstd450Refract:
+		if (get<SPIRType>(result_type).vecsize == 1)
+			emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract");
+		else
+			CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
+		break;
+
 	default:
 		CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
 		break;
@@ -4766,6 +4941,21 @@ string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool
 	return fname;
 }
 
+string CompilerMSL::convert_to_f32(const string &expr, uint32_t components)
+{
+	SPIRType t;
+	t.basetype = SPIRType::Float;
+	t.vecsize = components;
+	t.columns = 1;
+	return join(type_to_glsl_constructor(t), "(", expr, ")");
+}
+
+static inline bool sampling_type_needs_f32_conversion(const SPIRType &type)
+{
+	// Double is not supported to begin with, but doesn't hurt to check for completion.
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Double;
+}
+
 // Returns the function args for a texture sampling function for the specified image and sampling characteristics.
 string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
                                      uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
@@ -4804,6 +4994,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 		if (is_fetch)
 			tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 1);
 
 		alt_coord_component = 1;
 		break;
@@ -4839,6 +5031,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 		if (is_fetch)
 			tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 2);
 
 		alt_coord_component = 2;
 		break;
@@ -4849,6 +5043,8 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 		if (is_fetch)
 			tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")";
+		else if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);
 
 		alt_coord_component = 3;
 		break;
@@ -4866,6 +5062,9 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 				tex_coords = enclose_expression(tex_coords) + ".xyz";
 		}
 
+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords = convert_to_f32(tex_coords, 3);
+
 		alt_coord_component = 3;
 		break;
 
@@ -4896,7 +5095,12 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 
 	// If projection, use alt coord as divisor
 	if (is_proj)
-		tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	{
+		if (sampling_type_needs_f32_conversion(coord_type))
+			tex_coords += " / " + convert_to_f32(to_extract_component_expression(coord, alt_coord_component), 1);
+		else
+			tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component);
+	}
 
 	if (!farg_str.empty())
 		farg_str += ", ";
@@ -4930,11 +5134,19 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool
 		forward = forward && should_forward(dref);
 		farg_str += ", ";
 
+		auto &dref_type = expression_type(dref);
+
+		string dref_expr;
 		if (is_proj)
-			farg_str +=
-			    to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component);
+			dref_expr =
+			    join(to_enclosed_expression(dref), " / ", to_extract_component_expression(coord, alt_coord_component));
 		else
-			farg_str += to_expression(dref);
+			dref_expr = to_expression(dref);
+
+		if (sampling_type_needs_f32_conversion(dref_type))
+			dref_expr = convert_to_f32(dref_expr, 1);
+
+		farg_str += dref_expr;
 
 		if (msl_options.is_macos() && (grad_x || grad_y))
 		{
@@ -5626,6 +5838,10 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 		{
 			switch (builtin)
 			{
+			case BuiltInViewIndex:
+				if (!msl_options.multiview)
+					break;
+				/* fallthrough */
 			case BuiltInFrontFacing:
 			case BuiltInPointCoord:
 			case BuiltInFragCoord:
@@ -6013,7 +6229,12 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 			    bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance && bi_type != BuiltInSubgroupEqMask &&
 			    bi_type != BuiltInBaryCoordNV && bi_type != BuiltInBaryCoordNoPerspNV &&
 			    bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask &&
-			    bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask)
+			    bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask &&
+			    ((get_execution_model() == ExecutionModelFragment && msl_options.multiview) ||
+			     bi_type != BuiltInViewIndex) &&
+			    (get_execution_model() == ExecutionModelGLCompute ||
+			     (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) ||
+			     (bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize)))
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
@@ -6090,6 +6311,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
 string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 {
 	string ep_args = entry_point_arg_stage_in();
+	Bitset claimed_bindings;
 
 	for (uint32_t i = 0; i < kMaxArgumentBuffers; i++)
 	{
@@ -6104,12 +6326,30 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 		if (!ep_args.empty())
 			ep_args += ", ";
 
-		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
-		ep_args += " [[buffer(" + convert_to_string(i) + ")]]";
+		// Check if the argument buffer binding itself has been remapped.
+		uint32_t buffer_binding;
+		auto itr = resource_bindings.find({ get_entry_point().model, i, kArgumentBufferBinding });
+		if (itr != end(resource_bindings))
+		{
+			buffer_binding = itr->second.first.msl_buffer;
+			itr->second.second = true;
+		}
+		else
+		{
+			// As a fallback, directly map desc set <-> binding.
+			// If that was taken, take the next buffer binding.
+			if (claimed_bindings.get(i))
+				buffer_binding = next_metal_resource_index_buffer;
+			else
+				buffer_binding = i;
+		}
 
-		// Makes it more practical for testing, since the push constant block can occupy the first available
-		// buffer slot if it's not bound explicitly.
-		next_metal_resource_index_buffer = i + 1;
+		claimed_bindings.set(buffer_binding);
+
+		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
+		ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
+
+		next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
 	}
 
 	entry_point_args_discrete_descriptors(ep_args);
@@ -6401,6 +6641,50 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
 				}
 				break;
+			case BuiltInSubgroupLocalInvocationId:
+				// This is natively supported in compute shaders.
+				if (get_execution_model() == ExecutionModelGLCompute)
+					break;
+
+				// This is natively supported in fragment shaders in MSL 2.2.
+				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+					break;
+
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW(
+					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
+
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW(
+					    "SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
+
+				// Shaders other than compute shaders don't support the SIMD-group
+				// builtins directly, but we can emulate them using the SIMD-group
+				// functions. This might break if some of the subgroup terminated
+				// before reaching the entry point.
+				entry_func.fixup_hooks_in.push_back([=]() {
+					statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
+					          " = simd_prefix_exclusive_sum(1);");
+				});
+				break;
+			case BuiltInSubgroupSize:
+				// This is natively supported in compute shaders.
+				if (get_execution_model() == ExecutionModelGLCompute)
+					break;
+
+				// This is natively supported in fragment shaders in MSL 2.2.
+				if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
+					break;
+
+				if (msl_options.is_ios())
+					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
+
+				if (!msl_options.supports_msl_version(2, 1))
+					SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
+
+				entry_func.fixup_hooks_in.push_back(
+				    [=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
+				break;
 			case BuiltInSubgroupEqMask:
 				if (msl_options.is_ios())
 					SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
@@ -6408,7 +6692,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
 				entry_func.fixup_hooks_in.push_back([=]() {
 					statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
-					          builtin_subgroup_invocation_id_id, " > 32 ? uint4(0, (1 << (",
+					          to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
 					          to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
 					          to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
 				});
@@ -6489,6 +6773,44 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
 					          to_expression(builtin_subgroup_invocation_id_id), " - 32, 0)), uint2(0));");
 				});
 				break;
+			case BuiltInViewIndex:
+				if (!msl_options.multiview)
+				{
+					// According to the Vulkan spec, when not running under a multiview
+					// render pass, ViewIndex is 0.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement("const ", builtin_type_decl(bi_type), " ", to_expression(var_id), " = 0;");
+					});
+				}
+				else if (get_execution_model() == ExecutionModelFragment)
+				{
+					// Because we adjusted the view index in the vertex shader, we have to
+					// adjust it back here.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(to_expression(var_id), " += ", to_expression(view_mask_buffer_id), "[0];");
+					});
+				}
+				else if (get_execution_model() == ExecutionModelVertex)
+				{
+					// Metal provides no special support for multiview, so we smuggle
+					// the view index in the instance index.
+					entry_func.fixup_hooks_in.push_back([=]() {
+						statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
+						          to_expression(view_mask_buffer_id), "[0] + ", to_expression(builtin_instance_idx_id),
+						          " % ", to_expression(view_mask_buffer_id), "[1];");
+						statement(to_expression(builtin_instance_idx_id), " /= ", to_expression(view_mask_buffer_id),
+						          "[1];");
+					});
+					// In addition to setting the variable itself, we also need to
+					// set the render_target_array_index with it on output. We have to
+					// offset this by the base view index, because Metal isn't in on
+					// our little game here.
+					entry_func.fixup_hooks_out.push_back([=]() {
+						statement(to_expression(builtin_layer_id), " = ", to_expression(var_id), " - ",
+						          to_expression(view_mask_buffer_id), "[0];");
+					});
+				}
+				break;
 			default:
 				break;
 			}
@@ -7790,6 +8112,12 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 	case BuiltInSamplePosition:
 		// Shouldn't be reached.
 		SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL.");
+	case BuiltInViewIndex:
+		if (execution.model != ExecutionModelFragment)
+			SPIRV_CROSS_THROW("ViewIndex is handled specially outside fragment shaders.");
+		// The ViewIndex was implicitly used in the prior stages to set the render_target_array_index,
+		// so we can get it from there.
+		return "render_target_array_index";
 
 	// Fragment function out
 	case BuiltInFragDepth:
@@ -7820,7 +8148,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		return "thread_index_in_threadgroup";
 
 	case BuiltInSubgroupSize:
-		return "thread_execution_width";
+		if (execution.model == ExecutionModelFragment)
+		{
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("threads_per_simdgroup requires Metal 2.2 in fragment shaders.");
+			return "threads_per_simdgroup";
+		}
+		else
+		{
+			// thread_execution_width is an alias for threads_per_simdgroup, and it's only available since 1.0,
+			// but not in fragment.
+			return "thread_execution_width";
+		}
 
 	case BuiltInNumSubgroups:
 		if (!msl_options.supports_msl_version(2))
@@ -7833,9 +8172,18 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin)
 		return msl_options.is_ios() ? "quadgroup_index_in_threadgroup" : "simdgroup_index_in_threadgroup";
 
 	case BuiltInSubgroupLocalInvocationId:
-		if (!msl_options.supports_msl_version(2))
-			SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
-		return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
+		if (execution.model == ExecutionModelFragment)
+		{
+			if (!msl_options.supports_msl_version(2, 2))
+				SPIRV_CROSS_THROW("thread_index_in_simdgroup requires Metal 2.2 in fragment shaders.");
+			return "thread_index_in_simdgroup";
+		}
+		else
+		{
+			if (!msl_options.supports_msl_version(2))
+				SPIRV_CROSS_THROW("Subgroup builtins require Metal 2.0.");
+			return msl_options.is_ios() ? "thread_index_in_quadgroup" : "thread_index_in_simdgroup";
+		}
 
 	case BuiltInSubgroupEqMask:
 	case BuiltInSubgroupGeMask:
@@ -7937,6 +8285,8 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id)
 		return "uint";
 	case BuiltInSamplePosition:
 		return "float2";
+	case BuiltInViewIndex:
+		return "uint";
 
 	// Fragment function out
 	case BuiltInFragDepth:
@@ -7992,7 +8342,7 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma)
 }
 
 // Returns the byte size of a struct member.
-size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const
+size_t CompilerMSL::get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const
 {
 	auto &type = get<SPIRType>(struct_type.member_types[index]);
 
@@ -8358,7 +8708,7 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 		uint32_t extension_set = args[2];
 		if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
 		{
-			GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]);
+			auto op_450 = static_cast<GLSLstd450>(args[3]);
 			switch (op_450)
 			{
 			case GLSLstd450Radians:
@@ -8373,6 +8723,22 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
 				return SPVFuncImplFindUMsb;
 			case GLSLstd450SSign:
 				return SPVFuncImplSSign;
+			case GLSLstd450Reflect:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplReflectScalar;
+				else
+					return SPVFuncImplNone;
+			}
+			case GLSLstd450Refract:
+			{
+				auto &type = compiler.get<SPIRType>(args[0]);
+				if (type.vecsize == 1)
+					return SPVFuncImplRefractScalar;
+				else
+					return SPVFuncImplNone;
+			}
 			case GLSLstd450MatrixInverse:
 			{
 				auto &mat_type = compiler.get<SPIRType>(args[0]);
@@ -8519,6 +8885,9 @@ void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &exp
 	case BuiltInViewportIndex:
 	case BuiltInFragStencilRefEXT:
 	case BuiltInPrimitiveId:
+	case BuiltInSubgroupSize:
+	case BuiltInSubgroupLocalInvocationId:
+	case BuiltInViewIndex:
 		expected_type = SPIRType::UInt;
 		break;
 
@@ -8561,6 +8930,7 @@ void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr
 	case BuiltInViewportIndex:
 	case BuiltInFragStencilRefEXT:
 	case BuiltInPrimitiveId:
+	case BuiltInViewIndex:
 		expected_type = SPIRType::UInt;
 		break;
 
diff --git a/deps/SPIRV-Cross/spirv_msl.hpp b/deps/SPIRV-Cross/spirv_msl.hpp
index 5a059235d6..13ed36ef9a 100644
--- a/deps/SPIRV-Cross/spirv_msl.hpp
+++ b/deps/SPIRV-Cross/spirv_msl.hpp
@@ -161,6 +161,12 @@ static const uint32_t kSwizzleBufferBinding = ~(1u);
 // element to indicate the buffer binding for buffer size buffers to support OpArrayLength.
 static const uint32_t kBufferSizeBufferBinding = ~(2u);
 
+// Special constant used in a MSLResourceBinding binding
+// element to indicate the buffer binding used for the argument buffer itself.
+// This buffer binding should be kept as small as possible as all automatic bindings for buffers
+// will start at max(kArgumentBufferBinding) + 1.
+static const uint32_t kArgumentBufferBinding = ~(3u);
+
 static const uint32_t kMaxArgumentBuffers = 8;
 
 // Decompiles SPIR-V to Metal Shading Language
@@ -185,12 +191,14 @@ public:
 		uint32_t shader_patch_output_buffer_index = 27;
 		uint32_t shader_tess_factor_buffer_index = 26;
 		uint32_t buffer_size_buffer_index = 25;
+		uint32_t view_mask_buffer_index = 24;
 		uint32_t shader_input_wg_index = 0;
 		bool enable_point_size_builtin = true;
 		bool disable_rasterization = false;
 		bool capture_output_to_buffer = false;
 		bool swizzle_texture_samples = false;
 		bool tess_domain_origin_lower_left = false;
+		bool multiview = false;
 
 		// Enable use of MSL 2.0 indirect argument buffers.
 		// MSL 2.0 must also be enabled.
@@ -262,6 +270,13 @@ public:
 		return !buffers_requiring_array_length.empty();
 	}
 
+	// Provide feedback to calling API to allow it to pass a buffer
+	// containing the view mask for the current multiview subpass.
+	bool needs_view_mask_buffer() const
+	{
+		return msl_options.multiview;
+	}
+
 	// Provide feedback to calling API to allow it to pass an output
 	// buffer if the shader needs it.
 	bool needs_output_buffer() const
@@ -386,6 +401,8 @@ protected:
 		SPVFuncImplSubgroupBallotFindMSB,
 		SPVFuncImplSubgroupBallotBitCount,
 		SPVFuncImplSubgroupAllEqual,
+		SPVFuncImplReflectScalar,
+		SPVFuncImplRefractScalar,
 		SPVFuncImplArrayCopyMultidimMax = 6
 	};
 
@@ -406,7 +423,7 @@ protected:
 	std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override;
 	std::string sampler_type(const SPIRType &type);
 	std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override;
-	size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override;
+	size_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const;
 	std::string to_func_call_arg(uint32_t id) override;
 	std::string to_name(uint32_t id, bool allow_alias = true) const override;
 	std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
@@ -520,12 +537,15 @@ protected:
 	uint32_t builtin_base_vertex_id = 0;
 	uint32_t builtin_instance_idx_id = 0;
 	uint32_t builtin_base_instance_id = 0;
+	uint32_t builtin_view_idx_id = 0;
+	uint32_t builtin_layer_id = 0;
 	uint32_t builtin_invocation_id_id = 0;
 	uint32_t builtin_primitive_id_id = 0;
 	uint32_t builtin_subgroup_invocation_id_id = 0;
 	uint32_t builtin_subgroup_size_id = 0;
 	uint32_t swizzle_buffer_id = 0;
 	uint32_t buffer_size_buffer_id = 0;
+	uint32_t view_mask_buffer_id = 0;
 
 	void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
 	void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
@@ -538,6 +558,8 @@ protected:
 
 	void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id);
 
+	std::string convert_to_f32(const std::string &expr, uint32_t components);
+
 	Options msl_options;
 	std::set<SPVFuncImpl> spv_function_implementations;
 	std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location;
@@ -571,10 +593,12 @@ protected:
 	};
 
 	std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
+
 	uint32_t next_metal_resource_index_buffer = 0;
 	uint32_t next_metal_resource_index_texture = 0;
 	uint32_t next_metal_resource_index_sampler = 0;
-	uint32_t next_metal_resource_ids[kMaxArgumentBuffers] = {};
+	// Intentionally uninitialized, works around MSVC 2013 bug.
+	uint32_t next_metal_resource_ids[kMaxArgumentBuffers];
 
 	uint32_t stage_in_var_id = 0;
 	uint32_t stage_out_var_id = 0;
diff --git a/deps/SPIRV-Cross/spirv_parser.cpp b/deps/SPIRV-Cross/spirv_parser.cpp
index af0b2b35d7..a271eadff3 100644
--- a/deps/SPIRV-Cross/spirv_parser.cpp
+++ b/deps/SPIRV-Cross/spirv_parser.cpp
@@ -162,7 +162,6 @@ void Parser::parse(const Instruction &instruction)
 	case OpSourceContinued:
 	case OpSourceExtension:
 	case OpNop:
-	case OpNoLine:
 	case OpModuleProcessed:
 		break;
 
@@ -244,6 +243,8 @@ void Parser::parse(const Instruction &instruction)
 		auto ext = extract_string(ir.spirv, instruction.offset + 1);
 		if (ext == "GLSL.std.450")
 			set<SPIRExtension>(id, SPIRExtension::GLSL);
+		else if (ext == "DebugInfo")
+			set<SPIRExtension>(id, SPIRExtension::SPV_debug_info);
 		else if (ext == "SPV_AMD_shader_ballot")
 			set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot);
 		else if (ext == "SPV_AMD_shader_explicit_vertex_parameter")
@@ -260,6 +261,14 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpExtInst:
+	{
+		// The SPIR-V debug information extended instructions might come at global scope.
+		if (current_block)
+			current_block->ops.push_back(instruction);
+		break;
+	}
+
 	case OpEntryPoint:
 	{
 		auto itr =
@@ -1057,6 +1066,14 @@ void Parser::parse(const Instruction &instruction)
 		break;
 	}
 
+	case OpNoLine:
+	{
+		// OpNoLine might come at global scope.
+		if (current_block)
+			current_block->ops.push_back(instruction);
+		break;
+	}
+
 	// Actual opcodes.
 	default:
 	{
diff --git a/deps/SPIRV-Cross/test_shaders.py b/deps/SPIRV-Cross/test_shaders.py
index 2df8e59378..d2f75e8015 100755
--- a/deps/SPIRV-Cross/test_shaders.py
+++ b/deps/SPIRV-Cross/test_shaders.py
@@ -203,6 +203,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
         msl_args.append('3')
     if '.line.' in shader:
         msl_args.append('--emit-line-directives')
+    if '.multiview.' in shader:
+        msl_args.append('--msl-multiview')
 
     subprocess.check_call(msl_args)
 
diff --git a/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
new file mode 100644
index 0000000000..322c86bed3
--- /dev/null
+++ b/reference/opt/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static min16float4 FragColor;
+static min16float2 UV;
+
+struct SPIRV_Cross_Input
+{
+    min16float2 UV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    min16float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    UV = stage_input.UV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/comp/outer-product.comp b/reference/opt/shaders-hlsl/comp/outer-product.comp
new file mode 100644
index 0000000000..71613d4f15
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/outer-product.comp
@@ -0,0 +1,48 @@
+RWByteAddressBuffer _21 : register(u0);
+ByteAddressBuffer _26 : register(t1);
+
+void comp_main()
+{
+    float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y);
+    _21.Store2(0, asuint(_32[0]));
+    _21.Store2(8, asuint(_32[1]));
+    float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y);
+    _21.Store3(16, asuint(_41[0]));
+    _21.Store3(32, asuint(_41[1]));
+    float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y);
+    _21.Store4(48, asuint(_50[0]));
+    _21.Store4(64, asuint(_50[1]));
+    float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z);
+    _21.Store2(80, asuint(_58[0]));
+    _21.Store2(88, asuint(_58[1]));
+    _21.Store2(96, asuint(_58[2]));
+    float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z);
+    _21.Store3(112, asuint(_66[0]));
+    _21.Store3(128, asuint(_66[1]));
+    _21.Store3(144, asuint(_66[2]));
+    float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z);
+    _21.Store4(160, asuint(_74[0]));
+    _21.Store4(176, asuint(_74[1]));
+    _21.Store4(192, asuint(_74[2]));
+    float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w);
+    _21.Store2(208, asuint(_82[0]));
+    _21.Store2(216, asuint(_82[1]));
+    _21.Store2(224, asuint(_82[2]));
+    _21.Store2(232, asuint(_82[3]));
+    float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w);
+    _21.Store3(240, asuint(_90[0]));
+    _21.Store3(256, asuint(_90[1]));
+    _21.Store3(272, asuint(_90[2]));
+    _21.Store3(288, asuint(_90[3]));
+    float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w);
+    _21.Store4(304, asuint(_98[0]));
+    _21.Store4(320, asuint(_98[1]));
+    _21.Store4(336, asuint(_98[2]));
+    _21.Store4(352, asuint(_98[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..47f2fe4107
--- /dev/null
+++ b/reference/opt/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,14 @@
+RWByteAddressBuffer _9 : register(u0);
+
+void comp_main()
+{
+    _9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4)))));
+    _9.Store(12, asuint(length(asfloat(_9.Load(0)))));
+    _9.Store(16, asuint(sign(asfloat(_9.Load(0)))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag b/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..f37c1fcc4b
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,25 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = 0.0f.xxxx;
+    for (int _43 = 0; _43 < 3; )
+    {
+        FragColor[_43] += float(_43);
+        _43++;
+        continue;
+    }
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..0fb694c543
--- /dev/null
+++ b/reference/opt/shaders-hlsl/frag/scalar-refract-reflect.frag
@@ -0,0 +1,49 @@
+static float FragColor;
+static float3 vRefract;
+
+struct SPIRV_Cross_Input
+{
+    float3 vRefract : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+float SPIRV_Cross_Reflect(float i, float n)
+{
+    return i - 2.0 * dot(n, i) * n;
+}
+
+float SPIRV_Cross_Refract(float i, float n, float eta)
+{
+    float NoI = n * i;
+    float NoI2 = NoI * NoI;
+    float k = 1.0 - eta * eta * (1.0 - NoI2);
+    if (k < 0.0)
+    {
+        return 0.0;
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+void frag_main()
+{
+    FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vRefract = stage_input.vRefract;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
new file mode 100644
index 0000000000..97345820b6
--- /dev/null
+++ b/reference/opt/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    half4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    half2 UV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/comp/outer-product.comp b/reference/opt/shaders-msl/comp/outer-product.comp
new file mode 100644
index 0000000000..8e32db392e
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/outer-product.comp
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float2x2 m22;
+    float2x3 m23;
+    float2x4 m24;
+    float3x2 m32;
+    float3x3 m33;
+    float3x4 m34;
+    float4x2 m42;
+    float4x3 m43;
+    float4x4 m44;
+};
+
+struct ReadSSBO
+{
+    float2 v2;
+    float3 v3;
+    float4 v4;
+};
+
+kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
+{
+    _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
+    _21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y);
+    _21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y);
+    _21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z);
+    _21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z);
+    _21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z);
+    _21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w);
+    _21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w);
+    _21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w);
+}
+
diff --git a/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..312a6f9453
--- /dev/null
+++ b/reference/opt/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+};
+
+kernel void main0(device SSBO& _9 [[buffer(0)]])
+{
+    _9.c = abs(_9.a - _9.b);
+    _9.d = abs(_9.a);
+    _9.e = sign(_9.a);
+}
+
diff --git a/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag b/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..b4cd2085e3
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,23 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(0.0);
+    for (int _43 = 0; _43 < 3; )
+    {
+        out.FragColor[_43] += float(_43);
+        _43++;
+        continue;
+    }
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..592d445810
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/scalar-refract-reflect.frag
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vRefract [[user(locn0)]];
+};
+
+template<typename T>
+inline T spvReflect(T i, T n)
+{
+    return i - T(2) * i * n * n;
+}
+
+template<typename T>
+inline T spvRefract(T i, T n, T eta)
+{
+    T NoI = n * i;
+    T NoI2 = NoI * NoI;
+    T k = T(1) - eta * eta * (T(1) - NoI2);
+    if (k < T(0))
+    {
+        return T(0);
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z);
+    out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y);
+    out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y;
+    out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag b/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag
new file mode 100644
index 0000000000..03a536f0c6
--- /dev/null
+++ b/reference/opt/shaders-msl/frag/subgroup-builtins.msl22.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    uint2 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    main0_out out = {};
+    out.FragColor.x = gl_SubgroupSize;
+    out.FragColor.y = gl_SubgroupInvocationID;
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
new file mode 100644
index 0000000000..23c554940b
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex_0 [[user(locn1)]];
+    float2 vTex_1 [[user(locn2)]];
+    float2 vTex_2 [[user(locn3)]];
+    float2 vTex_3 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    float2 vTex[4] = {};
+    vTex[0] = in.vTex_0;
+    vTex[1] = in.vTex_1;
+    vTex[2] = in.vTex_2;
+    vTex[3] = in.vTex_3;
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
new file mode 100644
index 0000000000..fc9c4fcdd1
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
@@ -0,0 +1,89 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    simd_vote vote = simd_ballot(value);
+    // simd_ballot() returns a 64-bit integer-like object, but
+    // SPIR-V callers expect a uint4. We must convert.
+    // FIXME: This won't include higher bits if Apple ever supports
+    // 128 lanes in an SIMD-group.
+    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot)
+{
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot)
+{
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(value == simd_broadcast_first(value));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    uint gl_SubgroupSize = simd_sum(1);
+    uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    out.FragColor = float(gl_SubgroupSize);
+    out.FragColor = float(gl_SubgroupInvocationID);
+    out.FragColor = float4(gl_SubgroupEqMask).x;
+    out.FragColor = float4(gl_SubgroupGeMask).x;
+    out.FragColor = float4(gl_SubgroupGtMask).x;
+    out.FragColor = float4(gl_SubgroupLeMask).x;
+    out.FragColor = float4(gl_SubgroupLtMask).x;
+    uint4 _63 = spvSubgroupBallot(true);
+    float4 _147 = simd_prefix_inclusive_product(simd_product(float4(20.0)));
+    int4 _149 = simd_prefix_inclusive_product(simd_product(int4(20)));
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
new file mode 100644
index 0000000000..c42e67211e
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
+    gl_InstanceIndex /= spvViewMask[1];
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    out.gl_Layer = gl_ViewIndex - spvViewMask[0];
+    return out;
+}
+
diff --git a/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
new file mode 100644
index 0000000000..f87d2a11ad
--- /dev/null
+++ b/reference/opt/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    const uint gl_ViewIndex = 0;
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    return out;
+}
+
diff --git a/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
new file mode 100644
index 0000000000..9ae8d6fd7f
--- /dev/null
+++ b/reference/opt/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
@@ -0,0 +1,25 @@
+#version 450
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
+
+void main()
+{
+    int _30;
+    _30 = 7;
+    int _27_copy;
+    for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
+    {
+        if (5.0 > float(_27))
+        {
+            break;
+        }
+        else
+        {
+            continue;
+        }
+        continue;
+    }
+    imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
+}
+
diff --git a/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp
new file mode 100644
index 0000000000..7755593f57
--- /dev/null
+++ b/reference/opt/shaders/asm/extended-debug-extinst.invalid.asm.comp
@@ -0,0 +1,18 @@
+#version 430
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _8_9
+{
+    float _m0[];
+} _9;
+
+layout(binding = 1, std430) buffer _8_10
+{
+    float _m0[];
+} _10;
+
+void main()
+{
+    _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x];
+}
+
diff --git a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
new file mode 100644
index 0000000000..e0feb49327
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@@ -0,0 +1,19 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
diff --git a/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
new file mode 100644
index 0000000000..e13e4254ea
--- /dev/null
+++ b/reference/opt/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
@@ -0,0 +1,20 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+
+layout(set = 0, binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
diff --git a/reference/opt/shaders/comp/outer-product.comp b/reference/opt/shaders/comp/outer-product.comp
new file mode 100644
index 0000000000..d31dad3b2c
--- /dev/null
+++ b/reference/opt/shaders/comp/outer-product.comp
@@ -0,0 +1,36 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) writeonly buffer SSBO
+{
+    mat2 m22;
+    mat2x3 m23;
+    mat2x4 m24;
+    mat3x2 m32;
+    mat3 m33;
+    mat3x4 m34;
+    mat4x2 m42;
+    mat4x3 m43;
+    mat4 m44;
+} _21;
+
+layout(binding = 1, std430) readonly buffer ReadSSBO
+{
+    vec2 v2;
+    vec3 v3;
+    vec4 v4;
+} _26;
+
+void main()
+{
+    _21.m22 = outerProduct(_26.v2, _26.v2);
+    _21.m23 = outerProduct(_26.v3, _26.v2);
+    _21.m24 = outerProduct(_26.v4, _26.v2);
+    _21.m32 = outerProduct(_26.v2, _26.v3);
+    _21.m33 = outerProduct(_26.v3, _26.v3);
+    _21.m34 = outerProduct(_26.v4, _26.v3);
+    _21.m42 = outerProduct(_26.v2, _26.v4);
+    _21.m43 = outerProduct(_26.v3, _26.v4);
+    _21.m44 = outerProduct(_26.v4, _26.v4);
+}
+
diff --git a/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..c28face28a
--- /dev/null
+++ b/reference/opt/shaders/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,19 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+} _9;
+
+void main()
+{
+    _9.c = distance(_9.a, _9.b);
+    _9.d = length(_9.a);
+    _9.e = normalize(_9.a);
+}
+
diff --git a/reference/opt/shaders/frag/for-loop-continue-control-flow.frag b/reference/opt/shaders/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..f49b906099
--- /dev/null
+++ b/reference/opt/shaders/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,15 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(0.0);
+    for (int _43 = 0; _43 < 3; )
+    {
+        FragColor[_43] += float(_43);
+        _43++;
+        continue;
+    }
+}
+
diff --git a/reference/opt/shaders/frag/scalar-refract-reflect.frag b/reference/opt/shaders/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..94d671b5a9
--- /dev/null
+++ b/reference/opt/shaders/frag/scalar-refract-reflect.frag
@@ -0,0 +1,13 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract;
+
+void main()
+{
+    FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
diff --git a/reference/opt/shaders/frag/selection-block-dominator.frag b/reference/opt/shaders/frag/selection-block-dominator.frag
new file mode 100644
index 0000000000..f737f48917
--- /dev/null
+++ b/reference/opt/shaders/frag/selection-block-dominator.frag
@@ -0,0 +1,19 @@
+#version 450
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    for (;;)
+    {
+        if (vIndex != 1)
+        {
+            FragColor = vec4(1.0);
+            break;
+        }
+        FragColor = vec4(10.0);
+        break;
+    }
+}
+
diff --git a/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
new file mode 100644
index 0000000000..44cc8ab221
--- /dev/null
+++ b/reference/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
@@ -0,0 +1,52 @@
+struct UBO_1_1
+{
+    float4 v[64];
+};
+
+ConstantBuffer<UBO_1_1> ubos[] : register(b0, space2);
+ByteAddressBuffer ssbos[] : register(t0, space3);
+Texture2D<float4> uSamplers[] : register(t0, space0);
+SamplerState uSamps[] : register(s0, space1);
+Texture2D<float4> uCombinedSamplers[] : register(t4, space0);
+SamplerState _uCombinedSamplers_sampler[] : register(s4, space0);
+
+static int vIndex;
+static float4 FragColor;
+static float2 vUV;
+
+struct SPIRV_Cross_Input
+{
+    nointerpolation int vIndex : TEXCOORD0;
+    float2 vUV : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    int i = vIndex;
+    int _59 = i + 10;
+    int _64 = i + 40;
+    FragColor = uSamplers[NonUniformResourceIndex(_59)].Sample(uSamps[NonUniformResourceIndex(_64)], vUV);
+    int _71 = i + 10;
+    FragColor = uCombinedSamplers[NonUniformResourceIndex(_71)].Sample(_uCombinedSamplers_sampler[NonUniformResourceIndex(_71)], vUV);
+    int _77 = i + 20;
+    int _80 = i + 40;
+    FragColor += ubos[NonUniformResourceIndex(_77)].v[_80];
+    int _87 = i + 50;
+    int _90 = i + 60;
+    FragColor += asfloat(ssbos[NonUniformResourceIndex(_87)].Load4(_90 * 16 + 0));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vIndex = stage_input.vIndex;
+    vUV = stage_input.vUV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
new file mode 100644
index 0000000000..322c86bed3
--- /dev/null
+++ b/reference/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@@ -0,0 +1,29 @@
+Texture2D<float4> uTexture : register(t0);
+SamplerState _uTexture_sampler : register(s0);
+
+static min16float4 FragColor;
+static min16float2 UV;
+
+struct SPIRV_Cross_Input
+{
+    min16float2 UV : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    min16float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = min16float4(uTexture.Sample(_uTexture_sampler, UV));
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    UV = stage_input.UV;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/comp/outer-product.comp b/reference/shaders-hlsl/comp/outer-product.comp
new file mode 100644
index 0000000000..71613d4f15
--- /dev/null
+++ b/reference/shaders-hlsl/comp/outer-product.comp
@@ -0,0 +1,48 @@
+RWByteAddressBuffer _21 : register(u0);
+ByteAddressBuffer _26 : register(t1);
+
+void comp_main()
+{
+    float2x2 _32 = float2x2(asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load2(0)).y);
+    _21.Store2(0, asuint(_32[0]));
+    _21.Store2(8, asuint(_32[1]));
+    float2x3 _41 = float2x3(asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load2(0)).y);
+    _21.Store3(16, asuint(_41[0]));
+    _21.Store3(32, asuint(_41[1]));
+    float2x4 _50 = float2x4(asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load2(0)).y);
+    _21.Store4(48, asuint(_50[0]));
+    _21.Store4(64, asuint(_50[1]));
+    float3x2 _58 = float3x2(asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load3(16)).z);
+    _21.Store2(80, asuint(_58[0]));
+    _21.Store2(88, asuint(_58[1]));
+    _21.Store2(96, asuint(_58[2]));
+    float3x3 _66 = float3x3(asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load3(16)).z);
+    _21.Store3(112, asuint(_66[0]));
+    _21.Store3(128, asuint(_66[1]));
+    _21.Store3(144, asuint(_66[2]));
+    float3x4 _74 = float3x4(asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load3(16)).z);
+    _21.Store4(160, asuint(_74[0]));
+    _21.Store4(176, asuint(_74[1]));
+    _21.Store4(192, asuint(_74[2]));
+    float4x2 _82 = float4x2(asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load2(0)) * asfloat(_26.Load4(32)).w);
+    _21.Store2(208, asuint(_82[0]));
+    _21.Store2(216, asuint(_82[1]));
+    _21.Store2(224, asuint(_82[2]));
+    _21.Store2(232, asuint(_82[3]));
+    float4x3 _90 = float4x3(asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load3(16)) * asfloat(_26.Load4(32)).w);
+    _21.Store3(240, asuint(_90[0]));
+    _21.Store3(256, asuint(_90[1]));
+    _21.Store3(272, asuint(_90[2]));
+    _21.Store3(288, asuint(_90[3]));
+    float4x4 _98 = float4x4(asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).x, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).y, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).z, asfloat(_26.Load4(32)) * asfloat(_26.Load4(32)).w);
+    _21.Store4(304, asuint(_98[0]));
+    _21.Store4(320, asuint(_98[1]));
+    _21.Store4(336, asuint(_98[2]));
+    _21.Store4(352, asuint(_98[3]));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..47f2fe4107
--- /dev/null
+++ b/reference/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,14 @@
+RWByteAddressBuffer _9 : register(u0);
+
+void comp_main()
+{
+    _9.Store(8, asuint(distance(asfloat(_9.Load(0)), asfloat(_9.Load(4)))));
+    _9.Store(12, asuint(length(asfloat(_9.Load(0)))));
+    _9.Store(16, asuint(sign(asfloat(_9.Load(0)))));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
diff --git a/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag b/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..12e1e7f5fd
--- /dev/null
+++ b/reference/shaders-hlsl/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,44 @@
+static float4 FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 FragColor : SV_Target0;
+};
+
+void frag_main()
+{
+    FragColor = 0.0f.xxxx;
+    int i = 0;
+    int _36;
+    for (;;)
+    {
+        if (i < 3)
+        {
+            int a = i;
+            FragColor[a] += float(i);
+            if (false)
+            {
+                _36 = 1;
+            }
+            else
+            {
+                int _41 = i;
+                i = _41 + 1;
+                _36 = _41;
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-hlsl/frag/scalar-refract-reflect.frag b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..0fb694c543
--- /dev/null
+++ b/reference/shaders-hlsl/frag/scalar-refract-reflect.frag
@@ -0,0 +1,49 @@
+static float FragColor;
+static float3 vRefract;
+
+struct SPIRV_Cross_Input
+{
+    float3 vRefract : TEXCOORD0;
+};
+
+struct SPIRV_Cross_Output
+{
+    float FragColor : SV_Target0;
+};
+
+float SPIRV_Cross_Reflect(float i, float n)
+{
+    return i - 2.0 * dot(n, i) * n;
+}
+
+float SPIRV_Cross_Refract(float i, float n, float eta)
+{
+    float NoI = n * i;
+    float NoI2 = NoI * NoI;
+    float k = 1.0 - eta * eta * (1.0 - NoI2);
+    if (k < 0.0)
+    {
+        return 0.0;
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+void frag_main()
+{
+    FragColor = SPIRV_Cross_Refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += SPIRV_Cross_Reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    vRefract = stage_input.vRefract;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.FragColor = FragColor;
+    return stage_output;
+}
diff --git a/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
new file mode 100644
index 0000000000..97345820b6
--- /dev/null
+++ b/reference/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@@ -0,0 +1,22 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    half4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    half2 UV [[user(locn0)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uTexture [[texture(0)]], sampler uTextureSmplr [[sampler(0)]])
+{
+    main0_out out = {};
+    out.FragColor = half4(uTexture.sample(uTextureSmplr, float2(in.UV)));
+    return out;
+}
+
diff --git a/reference/shaders-msl/comp/outer-product.comp b/reference/shaders-msl/comp/outer-product.comp
new file mode 100644
index 0000000000..8e32db392e
--- /dev/null
+++ b/reference/shaders-msl/comp/outer-product.comp
@@ -0,0 +1,38 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float2x2 m22;
+    float2x3 m23;
+    float2x4 m24;
+    float3x2 m32;
+    float3x3 m33;
+    float3x4 m34;
+    float4x2 m42;
+    float4x3 m43;
+    float4x4 m44;
+};
+
+struct ReadSSBO
+{
+    float2 v2;
+    float3 v3;
+    float4 v4;
+};
+
+kernel void main0(device SSBO& _21 [[buffer(0)]], const device ReadSSBO& _26 [[buffer(1)]])
+{
+    _21.m22 = float2x2(_26.v2 * _26.v2.x, _26.v2 * _26.v2.y);
+    _21.m23 = float2x3(_26.v3 * _26.v2.x, _26.v3 * _26.v2.y);
+    _21.m24 = float2x4(_26.v4 * _26.v2.x, _26.v4 * _26.v2.y);
+    _21.m32 = float3x2(_26.v2 * _26.v3.x, _26.v2 * _26.v3.y, _26.v2 * _26.v3.z);
+    _21.m33 = float3x3(_26.v3 * _26.v3.x, _26.v3 * _26.v3.y, _26.v3 * _26.v3.z);
+    _21.m34 = float3x4(_26.v4 * _26.v3.x, _26.v4 * _26.v3.y, _26.v4 * _26.v3.z);
+    _21.m42 = float4x2(_26.v2 * _26.v4.x, _26.v2 * _26.v4.y, _26.v2 * _26.v4.z, _26.v2 * _26.v4.w);
+    _21.m43 = float4x3(_26.v3 * _26.v4.x, _26.v3 * _26.v4.y, _26.v3 * _26.v4.z, _26.v3 * _26.v4.w);
+    _21.m44 = float4x4(_26.v4 * _26.v4.x, _26.v4 * _26.v4.y, _26.v4 * _26.v4.z, _26.v4 * _26.v4.w);
+}
+
diff --git a/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..312a6f9453
--- /dev/null
+++ b/reference/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,21 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+};
+
+kernel void main0(device SSBO& _9 [[buffer(0)]])
+{
+    _9.c = abs(_9.a - _9.b);
+    _9.d = abs(_9.a);
+    _9.e = sign(_9.a);
+}
+
diff --git a/reference/shaders-msl/frag/for-loop-continue-control-flow.frag b/reference/shaders-msl/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..847ec6532f
--- /dev/null
+++ b/reference/shaders-msl/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,42 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    out.FragColor = float4(0.0);
+    int i = 0;
+    int _36;
+    for (;;)
+    {
+        if (i < 3)
+        {
+            int a = i;
+            out.FragColor[a] += float(i);
+            if (false)
+            {
+                _36 = 1;
+            }
+            else
+            {
+                int _41 = i;
+                i = _41 + 1;
+                _36 = _41;
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/scalar-refract-reflect.frag b/reference/shaders-msl/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..592d445810
--- /dev/null
+++ b/reference/shaders-msl/frag/scalar-refract-reflect.frag
@@ -0,0 +1,49 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float3 vRefract [[user(locn0)]];
+};
+
+template<typename T>
+inline T spvReflect(T i, T n)
+{
+    return i - T(2) * i * n * n;
+}
+
+template<typename T>
+inline T spvRefract(T i, T n, T eta)
+{
+    T NoI = n * i;
+    T NoI2 = NoI * NoI;
+    T k = T(1) - eta * eta * (T(1) - NoI2);
+    if (k < T(0))
+    {
+        return T(0);
+    }
+    else
+    {
+        return eta * i - (eta * NoI + sqrt(k)) * n;
+    }
+}
+
+fragment main0_out main0(main0_in in [[stage_in]])
+{
+    main0_out out = {};
+    out.FragColor = spvRefract(in.vRefract.x, in.vRefract.y, in.vRefract.z);
+    out.FragColor += spvReflect(in.vRefract.x, in.vRefract.y);
+    out.FragColor += refract(in.vRefract.xy, in.vRefract.yz, in.vRefract.z).y;
+    out.FragColor += reflect(in.vRefract.xy, in.vRefract.zy).y;
+    return out;
+}
+
diff --git a/reference/shaders-msl/frag/subgroup-builtins.msl22.frag b/reference/shaders-msl/frag/subgroup-builtins.msl22.frag
new file mode 100644
index 0000000000..03a536f0c6
--- /dev/null
+++ b/reference/shaders-msl/frag/subgroup-builtins.msl22.frag
@@ -0,0 +1,18 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    uint2 FragColor [[color(0)]];
+};
+
+fragment main0_out main0(uint gl_SubgroupSize [[threads_per_simdgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
+{
+    main0_out out = {};
+    out.FragColor.x = gl_SubgroupSize;
+    out.FragColor.y = gl_SubgroupInvocationID;
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
new file mode 100644
index 0000000000..23c554940b
--- /dev/null
+++ b/reference/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@@ -0,0 +1,32 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 FragColor [[color(0)]];
+};
+
+struct main0_in
+{
+    float4 vColor [[user(locn0)]];
+    float2 vTex_0 [[user(locn1)]];
+    float2 vTex_1 [[user(locn2)]];
+    float2 vTex_2 [[user(locn3)]];
+    float2 vTex_3 [[user(locn4)]];
+};
+
+fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], texture2d<float> uTex [[texture(0)]], sampler uTexSmplr [[sampler(0)]], uint gl_ViewIndex [[render_target_array_index]])
+{
+    main0_out out = {};
+    float2 vTex[4] = {};
+    vTex[0] = in.vTex_0;
+    vTex[1] = in.vTex_1;
+    vTex[2] = in.vTex_2;
+    vTex[3] = in.vTex_3;
+    gl_ViewIndex += spvViewMask[0];
+    out.FragColor = in.vColor * uTex.sample(uTexSmplr, vTex[int(gl_ViewIndex)]);
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
new file mode 100644
index 0000000000..affaf86d54
--- /dev/null
+++ b/reference/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
@@ -0,0 +1,143 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct main0_out
+{
+    float FragColor [[color(0)]];
+};
+
+inline uint4 spvSubgroupBallot(bool value)
+{
+    simd_vote vote = simd_ballot(value);
+    // simd_ballot() returns a 64-bit integer-like object, but
+    // SPIR-V callers expect a uint4. We must convert.
+    // FIXME: This won't include higher bits if Apple ever supports
+    // 128 lanes in an SIMD-group.
+    return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
+}
+
+inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
+{
+    return !!extract_bits(ballot[bit / 32], bit % 32, 1);
+}
+
+inline uint spvSubgroupBallotFindLSB(uint4 ballot)
+{
+    return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
+}
+
+inline uint spvSubgroupBallotFindMSB(uint4 ballot)
+{
+    return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
+}
+
+inline uint spvSubgroupBallotBitCount(uint4 ballot)
+{
+    return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
+}
+
+inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
+{
+    uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    return spvSubgroupBallotBitCount(ballot & mask);
+}
+
+template<typename T>
+inline bool spvSubgroupAllEqual(T value)
+{
+    return simd_all(value == simd_broadcast_first(value));
+}
+
+template<>
+inline bool spvSubgroupAllEqual(bool value)
+{
+    return simd_all(value) || !simd_any(value);
+}
+
+fragment main0_out main0()
+{
+    main0_out out = {};
+    uint gl_SubgroupSize = simd_sum(1);
+    uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
+    uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
+    uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
+    uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
+    uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
+    out.FragColor = float(gl_SubgroupSize);
+    out.FragColor = float(gl_SubgroupInvocationID);
+    bool elected = simd_is_first();
+    out.FragColor = float4(gl_SubgroupEqMask).x;
+    out.FragColor = float4(gl_SubgroupGeMask).x;
+    out.FragColor = float4(gl_SubgroupGtMask).x;
+    out.FragColor = float4(gl_SubgroupLeMask).x;
+    out.FragColor = float4(gl_SubgroupLtMask).x;
+    float4 broadcasted = simd_broadcast(float4(10.0), 8u);
+    float3 first = simd_broadcast_first(float3(20.0));
+    uint4 ballot_value = spvSubgroupBallot(true);
+    bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
+    bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
+    uint bit_count = spvSubgroupBallotBitCount(ballot_value);
+    uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
+    uint lsb = spvSubgroupBallotFindLSB(ballot_value);
+    uint msb = spvSubgroupBallotFindMSB(ballot_value);
+    uint shuffled = simd_shuffle(10u, 8u);
+    uint shuffled_xor = simd_shuffle_xor(30u, 8u);
+    uint shuffled_up = simd_shuffle_up(20u, 4u);
+    uint shuffled_down = simd_shuffle_down(20u, 4u);
+    bool has_all = simd_all(true);
+    bool has_any = simd_any(true);
+    bool has_equal = spvSubgroupAllEqual(0);
+    has_equal = spvSubgroupAllEqual(true);
+    float4 added = simd_sum(float4(20.0));
+    int4 iadded = simd_sum(int4(20));
+    float4 multiplied = simd_product(float4(20.0));
+    int4 imultiplied = simd_product(int4(20));
+    float4 lo = simd_min(float4(20.0));
+    float4 hi = simd_max(float4(20.0));
+    int4 slo = simd_min(int4(20));
+    int4 shi = simd_max(int4(20));
+    uint4 ulo = simd_min(uint4(20u));
+    uint4 uhi = simd_max(uint4(20u));
+    uint4 anded = simd_and(ballot_value);
+    uint4 ored = simd_or(ballot_value);
+    uint4 xored = simd_xor(ballot_value);
+    added = simd_prefix_inclusive_sum(added);
+    iadded = simd_prefix_inclusive_sum(iadded);
+    multiplied = simd_prefix_inclusive_product(multiplied);
+    imultiplied = simd_prefix_inclusive_product(imultiplied);
+    added = simd_prefix_exclusive_sum(multiplied);
+    multiplied = simd_prefix_exclusive_product(multiplied);
+    iadded = simd_prefix_exclusive_sum(imultiplied);
+    imultiplied = simd_prefix_exclusive_product(imultiplied);
+    added = quad_sum(added);
+    multiplied = quad_product(multiplied);
+    iadded = quad_sum(iadded);
+    imultiplied = quad_product(imultiplied);
+    lo = quad_min(lo);
+    hi = quad_max(hi);
+    ulo = quad_min(ulo);
+    uhi = quad_max(uhi);
+    slo = quad_min(slo);
+    shi = quad_max(shi);
+    anded = quad_and(anded);
+    ored = quad_or(ored);
+    xored = quad_xor(xored);
+    float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
+    float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
+    float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
+    float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
new file mode 100644
index 0000000000..c42e67211e
--- /dev/null
+++ b/reference/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@@ -0,0 +1,31 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant uint* spvViewMask [[buffer(24)]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    uint gl_ViewIndex = spvViewMask[0] + gl_InstanceIndex % spvViewMask[1];
+    gl_InstanceIndex /= spvViewMask[1];
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    out.gl_Layer = gl_ViewIndex - spvViewMask[0];
+    return out;
+}
+
diff --git a/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
new file mode 100644
index 0000000000..f87d2a11ad
--- /dev/null
+++ b/reference/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@@ -0,0 +1,29 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using namespace metal;
+
+struct MVPs
+{
+    float4x4 MVP[2];
+};
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+    uint gl_Layer [[render_target_array_index]];
+};
+
+struct main0_in
+{
+    float4 Position [[attribute(0)]];
+};
+
+vertex main0_out main0(main0_in in [[stage_in]], constant MVPs& _19 [[buffer(0)]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    const uint gl_ViewIndex = 0;
+    out.gl_Position = _19.MVP[int(gl_ViewIndex)] * in.Position;
+    return out;
+}
+
diff --git a/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk
new file mode 100644
index 0000000000..5f7ddeee17
--- /dev/null
+++ b/reference/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag.vk
@@ -0,0 +1,37 @@
+#version 450
+#extension GL_EXT_nonuniform_qualifier : require
+
+layout(set = 0, binding = 2, std140) uniform UBO
+{
+    vec4 v[64];
+} ubos[];
+
+layout(set = 0, binding = 3, std430) readonly buffer SSBO
+{
+    vec4 v[];
+} ssbos[];
+
+layout(set = 0, binding = 0) uniform texture2D uSamplers[];
+layout(set = 0, binding = 1) uniform sampler uSamps[];
+layout(set = 0, binding = 4) uniform sampler2D uCombinedSamplers[];
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out vec4 FragColor;
+layout(location = 1) in vec2 vUV;
+
+void main()
+{
+    int i = vIndex;
+    int _59 = i + 10;
+    int _64 = i + 40;
+    FragColor = texture(sampler2D(uSamplers[nonuniformEXT(_59)], uSamps[nonuniformEXT(_64)]), vUV);
+    int _71 = i + 10;
+    FragColor = texture(uCombinedSamplers[nonuniformEXT(_71)], vUV);
+    int _77 = i + 20;
+    int _80 = i + 40;
+    FragColor += ubos[nonuniformEXT(_77)].v[_80];
+    int _87 = i + 50;
+    int _90 = i + 60;
+    FragColor += ssbos[nonuniformEXT(_87)].v[_90];
+}
+
diff --git a/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
new file mode 100644
index 0000000000..9ae8d6fd7f
--- /dev/null
+++ b/reference/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
@@ -0,0 +1,25 @@
+#version 450
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(binding = 1, rgba32f) uniform writeonly image2D outImageTexture;
+
+void main()
+{
+    int _30;
+    _30 = 7;
+    int _27_copy;
+    for (int _27 = 7; _27 >= 0; _27_copy = _27, _27--, _30 = _27_copy)
+    {
+        if (5.0 > float(_27))
+        {
+            break;
+        }
+        else
+        {
+            continue;
+        }
+        continue;
+    }
+    imageStore(outImageTexture, ivec2(gl_GlobalInvocationID.xy), vec4(float(_30 - 1), float(_30), 1.0, 1.0));
+}
+
diff --git a/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp b/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp
new file mode 100644
index 0000000000..7755593f57
--- /dev/null
+++ b/reference/shaders/asm/extended-debug-extinst.invalid.asm.comp
@@ -0,0 +1,18 @@
+#version 430
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer _8_9
+{
+    float _m0[];
+} _9;
+
+layout(binding = 1, std430) buffer _8_10
+{
+    float _m0[];
+} _10;
+
+void main()
+{
+    _10._m0[gl_GlobalInvocationID.x] = -_9._m0[gl_GlobalInvocationID.x];
+}
+
diff --git a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
new file mode 100644
index 0000000000..e0feb49327
--- /dev/null
+++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@@ -0,0 +1,19 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_NV_gpu_shader5)
+#extension GL_NV_gpu_shader5 : require
+#else
+#error No extension available for FP16.
+#endif
+
+layout(binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
diff --git a/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
new file mode 100644
index 0000000000..e13e4254ea
--- /dev/null
+++ b/reference/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag.vk
@@ -0,0 +1,20 @@
+#version 450
+#if defined(GL_AMD_gpu_shader_half_float)
+#extension GL_AMD_gpu_shader_half_float : require
+#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+#else
+#error No extension available for FP16.
+#endif
+#extension GL_EXT_shader_16bit_storage : require
+
+layout(set = 0, binding = 0) uniform sampler2D uTexture;
+
+layout(location = 0) out f16vec4 FragColor;
+layout(location = 0) in f16vec2 UV;
+
+void main()
+{
+    FragColor = f16vec4(texture(uTexture, UV));
+}
+
diff --git a/reference/shaders/comp/outer-product.comp b/reference/shaders/comp/outer-product.comp
new file mode 100644
index 0000000000..d31dad3b2c
--- /dev/null
+++ b/reference/shaders/comp/outer-product.comp
@@ -0,0 +1,36 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) writeonly buffer SSBO
+{
+    mat2 m22;
+    mat2x3 m23;
+    mat2x4 m24;
+    mat3x2 m32;
+    mat3 m33;
+    mat3x4 m34;
+    mat4x2 m42;
+    mat4x3 m43;
+    mat4 m44;
+} _21;
+
+layout(binding = 1, std430) readonly buffer ReadSSBO
+{
+    vec2 v2;
+    vec3 v3;
+    vec4 v4;
+} _26;
+
+void main()
+{
+    _21.m22 = outerProduct(_26.v2, _26.v2);
+    _21.m23 = outerProduct(_26.v3, _26.v2);
+    _21.m24 = outerProduct(_26.v4, _26.v2);
+    _21.m32 = outerProduct(_26.v2, _26.v3);
+    _21.m33 = outerProduct(_26.v3, _26.v3);
+    _21.m34 = outerProduct(_26.v4, _26.v3);
+    _21.m42 = outerProduct(_26.v2, _26.v4);
+    _21.m43 = outerProduct(_26.v3, _26.v4);
+    _21.m44 = outerProduct(_26.v4, _26.v4);
+}
+
diff --git a/reference/shaders/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..c28face28a
--- /dev/null
+++ b/reference/shaders/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,19 @@
+#version 450
+layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+layout(binding = 0, std430) buffer SSBO
+{
+    float a;
+    float b;
+    float c;
+    float d;
+    float e;
+} _9;
+
+void main()
+{
+    _9.c = distance(_9.a, _9.b);
+    _9.d = length(_9.a);
+    _9.e = normalize(_9.a);
+}
+
diff --git a/reference/shaders/frag/for-loop-continue-control-flow.frag b/reference/shaders/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..638d89be6b
--- /dev/null
+++ b/reference/shaders/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,34 @@
+#version 450
+
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vec4(0.0);
+    int i = 0;
+    int _36;
+    for (;;)
+    {
+        if (i < 3)
+        {
+            int a = i;
+            FragColor[a] += float(i);
+            if (false)
+            {
+                _36 = 1;
+            }
+            else
+            {
+                int _41 = i;
+                i = _41 + 1;
+                _36 = _41;
+            }
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+}
+
diff --git a/reference/shaders/frag/scalar-refract-reflect.frag b/reference/shaders/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..94d671b5a9
--- /dev/null
+++ b/reference/shaders/frag/scalar-refract-reflect.frag
@@ -0,0 +1,13 @@
+#version 450
+
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract;
+
+void main()
+{
+    FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+    FragColor += reflect(vRefract.x, vRefract.y);
+    FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+    FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
+
diff --git a/reference/shaders/frag/selection-block-dominator.frag b/reference/shaders/frag/selection-block-dominator.frag
new file mode 100644
index 0000000000..a0242a00b5
--- /dev/null
+++ b/reference/shaders/frag/selection-block-dominator.frag
@@ -0,0 +1,20 @@
+#version 450
+
+layout(location = 0) flat in int vIndex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    int v;
+    if (vIndex != 1)
+    {
+        FragColor = vec4(1.0);
+        return;
+    }
+    else
+    {
+        v = 10;
+    }
+    FragColor = vec4(float(v));
+}
+
diff --git a/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag b/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
new file mode 100644
index 0000000000..5aa68849b1
--- /dev/null
+++ b/shaders-hlsl-no-opt/asm/frag/nonuniform-qualifier-propagation.nonuniformresource.sm51.asm.frag
@@ -0,0 +1,159 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 93
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniformEXT
+               OpCapability RuntimeDescriptorArrayEXT
+               OpCapability UniformBufferArrayNonUniformIndexingEXT
+               OpCapability SampledImageArrayNonUniformIndexingEXT
+               OpCapability StorageBufferArrayNonUniformIndexingEXT
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vIndex %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %i "i"
+               OpName %vIndex "vIndex"
+               OpName %FragColor "FragColor"
+               OpName %uSamplers "uSamplers"
+               OpName %uSamps "uSamps"
+               OpName %vUV "vUV"
+               OpName %uCombinedSamplers "uCombinedSamplers"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %ubos "ubos"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbos "ssbos"
+               OpDecorate %vIndex Flat
+               OpDecorate %vIndex Location 0
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamplers DescriptorSet 0
+               OpDecorate %uSamplers Binding 0
+
+               OpDecorate %sampled_image NonUniformEXT
+               OpDecorate %combined_sampler NonUniformEXT
+               OpDecorate %ubo_ptr_copy NonUniformEXT
+               OpDecorate %ssbo_ptr_copy NonUniformEXT
+
+               OpDecorate %uSamps DescriptorSet 1
+               OpDecorate %uSamps Binding 0
+               OpDecorate %vUV Location 1
+               OpDecorate %uCombinedSamplers DescriptorSet 0
+               OpDecorate %uCombinedSamplers Binding 4
+               OpDecorate %_arr_v4float_uint_64 ArrayStride 16
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %ubos DescriptorSet 2
+               OpDecorate %ubos Binding 0
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 3
+               OpDecorate %ssbos Binding 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Input_int = OpTypePointer Input %int
+     %vIndex = OpVariable %_ptr_Input_int Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %16 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_runtimearr_16 = OpTypeRuntimeArray %16
+%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
+  %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
+     %int_10 = OpConstant %int 10
+%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
+         %27 = OpTypeSampler
+%_runtimearr_27 = OpTypeRuntimeArray %27
+%_ptr_UniformConstant__runtimearr_27 = OpTypePointer UniformConstant %_runtimearr_27
+     %uSamps = OpVariable %_ptr_UniformConstant__runtimearr_27 UniformConstant
+     %int_40 = OpConstant %int 40
+%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27
+         %38 = OpTypeSampledImage %16
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+%_runtimearr_38 = OpTypeRuntimeArray %38
+%_ptr_UniformConstant__runtimearr_38 = OpTypePointer UniformConstant %_runtimearr_38
+%uCombinedSamplers = OpVariable %_ptr_UniformConstant__runtimearr_38 UniformConstant
+%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38
+       %uint = OpTypeInt 32 0
+    %uint_64 = OpConstant %uint 64
+%_arr_v4float_uint_64 = OpTypeArray %v4float %uint_64
+        %UBO = OpTypeStruct %_arr_v4float_uint_64
+%_runtimearr_UBO = OpTypeRuntimeArray %UBO
+%_ptr_Uniform__runtimearr_UBO = OpTypePointer Uniform %_runtimearr_UBO
+       %ubos = OpVariable %_ptr_Uniform__runtimearr_UBO Uniform
+     %int_20 = OpConstant %int 20
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %int_50 = OpConstant %int 50
+     %int_60 = OpConstant %int 60
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %11 = OpLoad %int %vIndex
+               OpStore %i %11
+         %20 = OpLoad %int %i
+         %22 = OpIAdd %int %20 %int_10
+         %23 = OpCopyObject %int %22
+         %25 = OpAccessChain %_ptr_UniformConstant_16 %uSamplers %23
+         %26 = OpLoad %16 %25
+         %31 = OpLoad %int %i
+         %33 = OpIAdd %int %31 %int_40
+         %34 = OpCopyObject %int %33
+         %36 = OpAccessChain %_ptr_UniformConstant_27 %uSamps %34
+         %37 = OpLoad %27 %36
+         %sampled_image = OpSampledImage %38 %26 %37
+         %43 = OpLoad %v2float %vUV
+         %44 = OpImageSampleImplicitLod %v4float %sampled_image %43
+               OpStore %FragColor %44
+         %48 = OpLoad %int %i
+         %49 = OpIAdd %int %48 %int_10
+         %50 = OpCopyObject %int %49
+         %52 = OpAccessChain %_ptr_UniformConstant_38 %uCombinedSamplers %50
+         %combined_sampler = OpLoad %38 %52
+         %54 = OpLoad %v2float %vUV
+         %55 = OpImageSampleImplicitLod %v4float %combined_sampler %54
+               OpStore %FragColor %55
+         %63 = OpLoad %int %i
+         %65 = OpIAdd %int %63 %int_20
+         %66 = OpCopyObject %int %65
+         %68 = OpLoad %int %i
+         %69 = OpIAdd %int %68 %int_40
+         %70 = OpCopyObject %int %69
+         %ubo_ptr = OpAccessChain %_ptr_Uniform_v4float %ubos %66 %int_0 %70
+         %ubo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ubo_ptr
+         %73 = OpLoad %v4float %ubo_ptr_copy
+         %74 = OpLoad %v4float %FragColor
+         %75 = OpFAdd %v4float %74 %73
+               OpStore %FragColor %75
+         %81 = OpLoad %int %i
+         %83 = OpIAdd %int %81 %int_50
+         %84 = OpCopyObject %int %83
+         %85 = OpLoad %int %i
+         %87 = OpIAdd %int %85 %int_60
+         %88 = OpCopyObject %int %87
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_v4float %ssbos %84 %int_0 %88
+         %ssbo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ssbo_ptr
+         %90 = OpLoad %v4float %ssbo_ptr_copy
+         %91 = OpLoad %v4float %FragColor
+         %92 = OpFAdd %v4float %91 %90
+               OpStore %FragColor %92
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
new file mode 100644
index 0000000000..ae7a972d7b
--- /dev/null
+++ b/shaders-hlsl/asm/frag/texture-sampling-fp16.asm.frag
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-hlsl/comp/outer-product.comp b/shaders-hlsl/comp/outer-product.comp
new file mode 100644
index 0000000000..9aba2a54b7
--- /dev/null
+++ b/shaders-hlsl/comp/outer-product.comp
@@ -0,0 +1,37 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) writeonly buffer SSBO
+{
+	mat2 m22;
+	mat2x3 m23;
+	mat2x4 m24;
+	mat3x2 m32;
+	mat3 m33;
+	mat3x4 m34;
+	mat4x2 m42;
+	mat4x3 m43;
+	mat4 m44;
+};
+
+layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
+{
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+};
+
+void main()
+{
+	m22 = outerProduct(v2, v2);
+	m23 = outerProduct(v3, v2);
+	m24 = outerProduct(v4, v2);
+
+	m32 = outerProduct(v2, v3);
+	m33 = outerProduct(v3, v3);
+	m34 = outerProduct(v4, v3);
+
+	m42 = outerProduct(v2, v4);
+	m43 = outerProduct(v3, v4);
+	m44 = outerProduct(v4, v4);
+}
diff --git a/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp b/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..37414737fd
--- /dev/null
+++ b/shaders-hlsl/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	float a;
+	float b;
+	float c;
+	float d;
+	float e;
+};
+
+void main()
+{
+	c = distance(a, b);
+	d = length(a);
+	e = normalize(a);
+}
diff --git a/shaders-hlsl/frag/for-loop-continue-control-flow.frag b/shaders-hlsl/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..1f91cca2a4
--- /dev/null
+++ b/shaders-hlsl/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+void main()
+{
+	FragColor = vec4(0.0);
+	for (int i = 0; i < 3; (0 > 1) ? 1 : i ++)
+	{
+		int a = i;
+		FragColor[a] += float(i);
+	}
+}
diff --git a/shaders-hlsl/frag/scalar-refract-reflect.frag b/shaders-hlsl/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..486ed90bd5
--- /dev/null
+++ b/shaders-hlsl/frag/scalar-refract-reflect.frag
@@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract; 
+
+void main()
+{
+	FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+	FragColor += reflect(vRefract.x, vRefract.y);
+	FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+	FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
diff --git a/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
new file mode 100644
index 0000000000..ae7a972d7b
--- /dev/null
+++ b/shaders-msl/asm/frag/texture-sampling-fp16.asm.frag
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders-msl/comp/outer-product.comp b/shaders-msl/comp/outer-product.comp
new file mode 100644
index 0000000000..9aba2a54b7
--- /dev/null
+++ b/shaders-msl/comp/outer-product.comp
@@ -0,0 +1,37 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) writeonly buffer SSBO
+{
+	mat2 m22;
+	mat2x3 m23;
+	mat2x4 m24;
+	mat3x2 m32;
+	mat3 m33;
+	mat3x4 m34;
+	mat4x2 m42;
+	mat4x3 m43;
+	mat4 m44;
+};
+
+layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
+{
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+};
+
+void main()
+{
+	m22 = outerProduct(v2, v2);
+	m23 = outerProduct(v3, v2);
+	m24 = outerProduct(v4, v2);
+
+	m32 = outerProduct(v2, v3);
+	m33 = outerProduct(v3, v3);
+	m34 = outerProduct(v4, v3);
+
+	m42 = outerProduct(v2, v4);
+	m43 = outerProduct(v3, v4);
+	m44 = outerProduct(v4, v4);
+}
diff --git a/shaders-msl/comp/scalar-std450-distance-length-normalize.comp b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..37414737fd
--- /dev/null
+++ b/shaders-msl/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	float a;
+	float b;
+	float c;
+	float d;
+	float e;
+};
+
+void main()
+{
+	c = distance(a, b);
+	d = length(a);
+	e = normalize(a);
+}
diff --git a/shaders-msl/frag/for-loop-continue-control-flow.frag b/shaders-msl/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..1f91cca2a4
--- /dev/null
+++ b/shaders-msl/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+void main()
+{
+	FragColor = vec4(0.0);
+	for (int i = 0; i < 3; (0 > 1) ? 1 : i ++)
+	{
+		int a = i;
+		FragColor[a] += float(i);
+	}
+}
diff --git a/shaders-msl/frag/scalar-refract-reflect.frag b/shaders-msl/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..486ed90bd5
--- /dev/null
+++ b/shaders-msl/frag/scalar-refract-reflect.frag
@@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract; 
+
+void main()
+{
+	FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+	FragColor += reflect(vRefract.x, vRefract.y);
+	FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+	FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
diff --git a/shaders-msl/frag/subgroup-builtins.msl22.frag b/shaders-msl/frag/subgroup-builtins.msl22.frag
new file mode 100644
index 0000000000..746438f00f
--- /dev/null
+++ b/shaders-msl/frag/subgroup-builtins.msl22.frag
@@ -0,0 +1,10 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+
+layout(location = 0) out uvec2 FragColor;
+
+void main()
+{
+	FragColor.x = gl_SubgroupSize;
+	FragColor.y = gl_SubgroupInvocationID;
+}
diff --git a/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag b/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
new file mode 100644
index 0000000000..963493b871
--- /dev/null
+++ b/shaders-msl/vulkan/frag/basic.multiview.nocompat.vk.frag
@@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+precision mediump float;
+
+layout(location = 0) in vec4 vColor;
+layout(location = 1) in vec2 vTex[4];
+layout(binding = 0) uniform sampler2D uTex;
+layout(location = 0) out vec4 FragColor;
+
+void main()
+{
+    FragColor = vColor * texture(uTex, vTex[gl_ViewIndex]);
+}
+
diff --git a/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag b/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
new file mode 100644
index 0000000000..3a2cf0234c
--- /dev/null
+++ b/shaders-msl/vulkan/frag/subgroups.nocompat.invalid.vk.msl21.frag
@@ -0,0 +1,119 @@
+#version 450
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_shuffle : require
+#extension GL_KHR_shader_subgroup_shuffle_relative : require
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_clustered : require
+#extension GL_KHR_shader_subgroup_quad : require
+
+layout(location = 0) out float FragColor;
+
+void main()
+{
+	// basic
+	FragColor = float(gl_SubgroupSize);
+	FragColor = float(gl_SubgroupInvocationID);
+	subgroupBarrier();
+	subgroupMemoryBarrier();
+	subgroupMemoryBarrierBuffer();
+	subgroupMemoryBarrierImage();
+	bool elected = subgroupElect();
+
+	// ballot
+	FragColor = float(gl_SubgroupEqMask);
+	FragColor = float(gl_SubgroupGeMask);
+	FragColor = float(gl_SubgroupGtMask);
+	FragColor = float(gl_SubgroupLeMask);
+	FragColor = float(gl_SubgroupLtMask);
+	vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
+	vec3 first = subgroupBroadcastFirst(vec3(20.0));
+	uvec4 ballot_value = subgroupBallot(true);
+	bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
+	bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
+	uint bit_count = subgroupBallotBitCount(ballot_value);
+	uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
+	uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
+	uint lsb = subgroupBallotFindLSB(ballot_value);
+	uint msb = subgroupBallotFindMSB(ballot_value);
+
+	// shuffle
+	uint shuffled = subgroupShuffle(10u, 8u);
+	uint shuffled_xor = subgroupShuffleXor(30u, 8u);
+
+	// shuffle relative 
+	uint shuffled_up = subgroupShuffleUp(20u, 4u);
+	uint shuffled_down = subgroupShuffleDown(20u, 4u);
+
+	// vote
+	bool has_all = subgroupAll(true);
+	bool has_any = subgroupAny(true);
+	bool has_equal = subgroupAllEqual(0);
+	has_equal = subgroupAllEqual(true);
+
+	// arithmetic
+	vec4 added = subgroupAdd(vec4(20.0));
+	ivec4 iadded = subgroupAdd(ivec4(20));
+	vec4 multiplied = subgroupMul(vec4(20.0));
+	ivec4 imultiplied = subgroupMul(ivec4(20));
+	vec4 lo = subgroupMin(vec4(20.0));
+	vec4 hi = subgroupMax(vec4(20.0));
+	ivec4 slo = subgroupMin(ivec4(20));
+	ivec4 shi = subgroupMax(ivec4(20));
+	uvec4 ulo = subgroupMin(uvec4(20));
+	uvec4 uhi = subgroupMax(uvec4(20));
+	uvec4 anded = subgroupAnd(ballot_value);
+	uvec4 ored = subgroupOr(ballot_value);
+	uvec4 xored = subgroupXor(ballot_value);
+
+	added = subgroupInclusiveAdd(added);
+	iadded = subgroupInclusiveAdd(iadded);
+	multiplied = subgroupInclusiveMul(multiplied);
+	imultiplied = subgroupInclusiveMul(imultiplied);
+	//lo = subgroupInclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupInclusiveMax(hi);
+	//slo = subgroupInclusiveMin(slo);
+	//shi = subgroupInclusiveMax(shi);
+	//ulo = subgroupInclusiveMin(ulo);
+	//uhi = subgroupInclusiveMax(uhi);
+	//anded = subgroupInclusiveAnd(anded);
+	//ored = subgroupInclusiveOr(ored);
+	//xored = subgroupInclusiveXor(ored);
+	//added = subgroupExclusiveAdd(lo);
+
+	added = subgroupExclusiveAdd(multiplied);
+	multiplied = subgroupExclusiveMul(multiplied);
+	iadded = subgroupExclusiveAdd(imultiplied);
+	imultiplied = subgroupExclusiveMul(imultiplied);
+	//lo = subgroupExclusiveMin(lo);  // FIXME: Unsupported by Metal
+	//hi = subgroupExclusiveMax(hi);
+	//ulo = subgroupExclusiveMin(ulo);
+	//uhi = subgroupExclusiveMax(uhi);
+	//slo = subgroupExclusiveMin(slo);
+	//shi = subgroupExclusiveMax(shi);
+	//anded = subgroupExclusiveAnd(anded);
+	//ored = subgroupExclusiveOr(ored);
+	//xored = subgroupExclusiveXor(ored);
+
+	// clustered
+	added = subgroupClusteredAdd(added, 4u);
+	multiplied = subgroupClusteredMul(multiplied, 4u);
+	iadded = subgroupClusteredAdd(iadded, 4u);
+	imultiplied = subgroupClusteredMul(imultiplied, 4u);
+	lo = subgroupClusteredMin(lo, 4u);
+	hi = subgroupClusteredMax(hi, 4u);
+	ulo = subgroupClusteredMin(ulo, 4u);
+	uhi = subgroupClusteredMax(uhi, 4u);
+	slo = subgroupClusteredMin(slo, 4u);
+	shi = subgroupClusteredMax(shi, 4u);
+	anded = subgroupClusteredAnd(anded, 4u);
+	ored = subgroupClusteredOr(ored, 4u);
+	xored = subgroupClusteredXor(xored, 4u);
+
+	// quad
+	vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
+	vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
+	vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
+	vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
+}
diff --git a/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert b/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
new file mode 100644
index 0000000000..eb1bc766f2
--- /dev/null
+++ b/shaders-msl/vulkan/vert/multiview.multiview.nocompat.vk.vert
@@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+
+layout(std140, binding = 0) uniform MVPs
+{
+	mat4 MVP[2];
+};
+
+layout(location = 0) in vec4 Position;
+
+void main()
+{
+	gl_Position = MVP[gl_ViewIndex] * Position;
+}
diff --git a/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert b/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
new file mode 100644
index 0000000000..eb1bc766f2
--- /dev/null
+++ b/shaders-msl/vulkan/vert/multiview.nocompat.vk.vert
@@ -0,0 +1,14 @@
+#version 310 es
+#extension GL_EXT_multiview : require
+
+layout(std140, binding = 0) uniform MVPs
+{
+	mat4 MVP[2];
+};
+
+layout(location = 0) in vec4 Position;
+
+void main()
+{
+	gl_Position = MVP[gl_ViewIndex] * Position;
+}
diff --git a/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag b/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag
new file mode 100644
index 0000000000..e2d4562a8a
--- /dev/null
+++ b/shaders-no-opt/asm/frag/nonuniform-qualifier-propagation.vk.nocompat.asm.frag
@@ -0,0 +1,159 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 93
+; Schema: 0
+               OpCapability Shader
+               OpCapability ShaderNonUniformEXT
+               OpCapability RuntimeDescriptorArrayEXT
+               OpCapability UniformBufferArrayNonUniformIndexingEXT
+               OpCapability SampledImageArrayNonUniformIndexingEXT
+               OpCapability StorageBufferArrayNonUniformIndexingEXT
+               OpExtension "SPV_EXT_descriptor_indexing"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %vIndex %FragColor %vUV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_nonuniform_qualifier"
+               OpName %main "main"
+               OpName %i "i"
+               OpName %vIndex "vIndex"
+               OpName %FragColor "FragColor"
+               OpName %uSamplers "uSamplers"
+               OpName %uSamps "uSamps"
+               OpName %vUV "vUV"
+               OpName %uCombinedSamplers "uCombinedSamplers"
+               OpName %UBO "UBO"
+               OpMemberName %UBO 0 "v"
+               OpName %ubos "ubos"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "v"
+               OpName %ssbos "ssbos"
+               OpDecorate %vIndex Flat
+               OpDecorate %vIndex Location 0
+               OpDecorate %FragColor Location 0
+               OpDecorate %uSamplers DescriptorSet 0
+               OpDecorate %uSamplers Binding 0
+
+               OpDecorate %sampled_image NonUniformEXT
+               OpDecorate %combined_sampler NonUniformEXT
+               OpDecorate %ubo_ptr_copy NonUniformEXT
+               OpDecorate %ssbo_ptr_copy NonUniformEXT
+
+               OpDecorate %uSamps DescriptorSet 0
+               OpDecorate %uSamps Binding 1
+               OpDecorate %vUV Location 1
+               OpDecorate %uCombinedSamplers DescriptorSet 0
+               OpDecorate %uCombinedSamplers Binding 4
+               OpDecorate %_arr_v4float_uint_64 ArrayStride 16
+               OpMemberDecorate %UBO 0 Offset 0
+               OpDecorate %UBO Block
+               OpDecorate %ubos DescriptorSet 0
+               OpDecorate %ubos Binding 2
+               OpDecorate %_runtimearr_v4float ArrayStride 16
+               OpMemberDecorate %SSBO 0 NonWritable
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %ssbos DescriptorSet 0
+               OpDecorate %ssbos Binding 3
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+        %int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%_ptr_Input_int = OpTypePointer Input %int
+     %vIndex = OpVariable %_ptr_Input_int Input
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+  %FragColor = OpVariable %_ptr_Output_v4float Output
+         %16 = OpTypeImage %float 2D 0 0 0 1 Unknown
+%_runtimearr_16 = OpTypeRuntimeArray %16
+%_ptr_UniformConstant__runtimearr_16 = OpTypePointer UniformConstant %_runtimearr_16
+  %uSamplers = OpVariable %_ptr_UniformConstant__runtimearr_16 UniformConstant
+     %int_10 = OpConstant %int 10
+%_ptr_UniformConstant_16 = OpTypePointer UniformConstant %16
+         %27 = OpTypeSampler
+%_runtimearr_27 = OpTypeRuntimeArray %27
+%_ptr_UniformConstant__runtimearr_27 = OpTypePointer UniformConstant %_runtimearr_27
+     %uSamps = OpVariable %_ptr_UniformConstant__runtimearr_27 UniformConstant
+     %int_40 = OpConstant %int 40
+%_ptr_UniformConstant_27 = OpTypePointer UniformConstant %27
+         %38 = OpTypeSampledImage %16
+    %v2float = OpTypeVector %float 2
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+        %vUV = OpVariable %_ptr_Input_v2float Input
+%_runtimearr_38 = OpTypeRuntimeArray %38
+%_ptr_UniformConstant__runtimearr_38 = OpTypePointer UniformConstant %_runtimearr_38
+%uCombinedSamplers = OpVariable %_ptr_UniformConstant__runtimearr_38 UniformConstant
+%_ptr_UniformConstant_38 = OpTypePointer UniformConstant %38
+       %uint = OpTypeInt 32 0
+    %uint_64 = OpConstant %uint 64
+%_arr_v4float_uint_64 = OpTypeArray %v4float %uint_64
+        %UBO = OpTypeStruct %_arr_v4float_uint_64
+%_runtimearr_UBO = OpTypeRuntimeArray %UBO
+%_ptr_Uniform__runtimearr_UBO = OpTypePointer Uniform %_runtimearr_UBO
+       %ubos = OpVariable %_ptr_Uniform__runtimearr_UBO Uniform
+     %int_20 = OpConstant %int 20
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_runtimearr_v4float = OpTypeRuntimeArray %v4float
+       %SSBO = OpTypeStruct %_runtimearr_v4float
+%_runtimearr_SSBO = OpTypeRuntimeArray %SSBO
+%_ptr_Uniform__runtimearr_SSBO = OpTypePointer Uniform %_runtimearr_SSBO
+      %ssbos = OpVariable %_ptr_Uniform__runtimearr_SSBO Uniform
+     %int_50 = OpConstant %int 50
+     %int_60 = OpConstant %int 60
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+          %i = OpVariable %_ptr_Function_int Function
+         %11 = OpLoad %int %vIndex
+               OpStore %i %11
+         %20 = OpLoad %int %i
+         %22 = OpIAdd %int %20 %int_10
+         %23 = OpCopyObject %int %22
+         %25 = OpAccessChain %_ptr_UniformConstant_16 %uSamplers %23
+         %26 = OpLoad %16 %25
+         %31 = OpLoad %int %i
+         %33 = OpIAdd %int %31 %int_40
+         %34 = OpCopyObject %int %33
+         %36 = OpAccessChain %_ptr_UniformConstant_27 %uSamps %34
+         %37 = OpLoad %27 %36
+         %sampled_image = OpSampledImage %38 %26 %37
+         %43 = OpLoad %v2float %vUV
+         %44 = OpImageSampleImplicitLod %v4float %sampled_image %43
+               OpStore %FragColor %44
+         %48 = OpLoad %int %i
+         %49 = OpIAdd %int %48 %int_10
+         %50 = OpCopyObject %int %49
+         %52 = OpAccessChain %_ptr_UniformConstant_38 %uCombinedSamplers %50
+         %combined_sampler = OpLoad %38 %52
+         %54 = OpLoad %v2float %vUV
+         %55 = OpImageSampleImplicitLod %v4float %combined_sampler %54
+               OpStore %FragColor %55
+         %63 = OpLoad %int %i
+         %65 = OpIAdd %int %63 %int_20
+         %66 = OpCopyObject %int %65
+         %68 = OpLoad %int %i
+         %69 = OpIAdd %int %68 %int_40
+         %70 = OpCopyObject %int %69
+         %ubo_ptr = OpAccessChain %_ptr_Uniform_v4float %ubos %66 %int_0 %70
+         %ubo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ubo_ptr
+         %73 = OpLoad %v4float %ubo_ptr_copy
+         %74 = OpLoad %v4float %FragColor
+         %75 = OpFAdd %v4float %74 %73
+               OpStore %FragColor %75
+         %81 = OpLoad %int %i
+         %83 = OpIAdd %int %81 %int_50
+         %84 = OpCopyObject %int %83
+         %85 = OpLoad %int %i
+         %87 = OpIAdd %int %85 %int_60
+         %88 = OpCopyObject %int %87
+         %ssbo_ptr = OpAccessChain %_ptr_Uniform_v4float %ssbos %84 %int_0 %88
+         %ssbo_ptr_copy = OpCopyObject %_ptr_Uniform_v4float %ssbo_ptr
+         %90 = OpLoad %v4float %ssbo_ptr_copy
+         %91 = OpLoad %v4float %FragColor
+         %92 = OpFAdd %v4float %91 %90
+               OpStore %FragColor %92
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp b/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
new file mode 100644
index 0000000000..611591246c
--- /dev/null
+++ b/shaders/asm/comp/phi-temporary-copy-loop-variable.asm.comp
@@ -0,0 +1,68 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Google spiregg; 0
+; Bound: 42
+; Schema: 0
+               OpCapability Shader
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %cs_test "main" %gl_GlobalInvocationID %gl_LocalInvocationIndex
+               OpExecutionMode %cs_test LocalSize 8 8 1
+               OpSource HLSL 600
+               OpName %type_2d_image "type.2d.image"
+               OpName %outImageTexture "outImageTexture"
+               OpName %cs_test "cs_test"
+               OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+               OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex
+               OpDecorate %outImageTexture DescriptorSet 0
+               OpDecorate %outImageTexture Binding 1
+      %float = OpTypeFloat 32
+    %float_5 = OpConstant %float 5
+    %float_1 = OpConstant %float 1
+        %int = OpTypeInt 32 1
+      %int_7 = OpConstant %int 7
+      %int_0 = OpConstant %int 0
+      %int_1 = OpConstant %int 1
+%type_2d_image = OpTypeImage %float 2D 2 0 0 2 Rgba32f
+%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image
+       %uint = OpTypeInt 32 0
+     %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+%_ptr_Input_uint = OpTypePointer Input %uint
+       %void = OpTypeVoid
+         %19 = OpTypeFunction %void
+     %v2uint = OpTypeVector %uint 2
+    %v4float = OpTypeVector %float 4
+       %bool = OpTypeBool
+%outImageTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input
+    %cs_test = OpFunction %void None %19
+         %23 = OpLabel
+         %24 = OpLoad %v3uint %gl_GlobalInvocationID
+         %25 = OpVectorShuffle %v2uint %24 %24 0 1
+               OpBranch %26
+         %26 = OpLabel
+         %27 = OpPhi %int %int_7 %23 %28 %29
+         %30 = OpPhi %int %int_7 %23 %27 %29
+         %31 = OpSGreaterThanEqual %bool %27 %int_0
+               OpLoopMerge %32 %29 None
+               OpBranchConditional %31 %33 %32
+         %33 = OpLabel
+         %34 = OpConvertSToF %float %27
+         %35 = OpFOrdGreaterThan %bool %float_5 %34
+               OpSelectionMerge %29 None
+               OpBranchConditional %35 %36 %29
+         %36 = OpLabel
+               OpBranch %32
+         %29 = OpLabel
+         %28 = OpISub %int %27 %int_1
+               OpBranch %26
+         %32 = OpLabel
+         %37 = OpISub %int %30 %int_1
+         %38 = OpConvertSToF %float %37
+         %39 = OpConvertSToF %float %30
+         %40 = OpCompositeConstruct %v4float %38 %39 %float_1 %float_1
+         %41 = OpLoad %type_2d_image %outImageTexture
+               OpImageWrite %41 %25 %40 None
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/extended-debug-extinst.invalid.asm.comp b/shaders/asm/extended-debug-extinst.invalid.asm.comp
new file mode 100644
index 0000000000..5b6a189a71
--- /dev/null
+++ b/shaders/asm/extended-debug-extinst.invalid.asm.comp
@@ -0,0 +1,67 @@
+; SPIR-V
+; Version: 1.3
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 37
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "DebugInfo"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %2 "main" %3
+               OpExecutionMode %2 LocalSize 1 1 1
+          %4 = OpString "negateInputs.comp"
+          %5 = OpString "negateInputs"
+          %6 = OpString "main"
+          %7 = OpString ""
+               OpSource GLSL 430
+               OpName %2 "main"
+               OpName %3 "gl_GlobalInvocationID"
+               OpDecorate %3 BuiltIn GlobalInvocationId
+               OpDecorate %8 BufferBlock
+               OpDecorate %9 DescriptorSet 0
+               OpDecorate %9 Binding 0
+               OpDecorate %10 DescriptorSet 0
+               OpDecorate %10 Binding 1
+               OpDecorate %11 ArrayStride 4
+               OpMemberDecorate %8 0 Offset 0
+               OpLine %4 0 0
+         %12 = OpTypeBool
+         %13 = OpTypeVoid
+         %14 = OpTypeFunction %13
+         %15 = OpTypeInt 32 0
+         %16 = OpTypeInt 32 1
+         %17 = OpTypeFloat 32
+         %18 = OpTypeVector %15 3
+         %19 = OpTypeVector %17 3
+         %20 = OpTypePointer Input %18
+         %21 = OpTypePointer Uniform %16
+         %22 = OpTypePointer Uniform %17
+         %23 = OpTypeRuntimeArray %16
+         %11 = OpTypeRuntimeArray %17
+          %8 = OpTypeStruct %11
+         %24 = OpTypePointer Uniform %8
+          %9 = OpVariable %24 Uniform
+         %10 = OpVariable %24 Uniform
+               OpLine %4 0 1
+               OpLine %5 1 0
+               OpLine %4 1000 100000
+          %3 = OpVariable %20 Input
+         %25 = OpConstant %16 0
+		 	OpNoLine
+               OpLine %4 1 1
+         %26 = OpExtInst %13 %1 DebugInfoNone
+         %27 = OpExtInst %13 %1 DebugTypeFunction %13
+         %28 = OpExtInst %13 %1 DebugFunction %6 %27 %4 1 1 %4 %7 FlagIsDefinition|FlagPrototyped|FlagIsOptimized 1 %26 %26
+          %2 = OpFunction %13 None %14
+         %29 = OpLabel
+         %30 = OpExtInst %13 %1 DebugScope %28
+               OpLine %4 1 1
+         %31 = OpLoad %18 %3
+         %32 = OpCompositeExtract %15 %31 0
+         %33 = OpAccessChain %22 %9 %25 %32
+         %34 = OpLoad %17 %33
+         %35 = OpFNegate %17 %34
+         %36 = OpAccessChain %22 %10 %25 %32
+               OpStore %36 %35
+			   OpNoLine
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
new file mode 100644
index 0000000000..ae7a972d7b
--- /dev/null
+++ b/shaders/asm/frag/texture-sampling-fp16.asm.vk.frag
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 7
+; Bound: 25
+; Schema: 0
+               OpCapability Shader
+               OpCapability StorageInputOutput16
+               OpExtension "SPV_KHR_16bit_storage"
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %FragColor %UV
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16"
+               OpName %main "main"
+               OpName %FragColor "FragColor"
+               OpName %uTexture "uTexture"
+               OpName %UV "UV"
+               OpDecorate %FragColor Location 0
+               OpDecorate %uTexture DescriptorSet 0
+               OpDecorate %uTexture Binding 0
+               OpDecorate %UV Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+       %half = OpTypeFloat 16
+       %float = OpTypeFloat 32
+     %v4half = OpTypeVector %half 4
+     %v4float = OpTypeVector %float 4
+%_ptr_Output_v4half = OpTypePointer Output %v4half
+  %FragColor = OpVariable %_ptr_Output_v4half Output
+         %11 = OpTypeImage %float 2D 0 0 0 1 Unknown
+         %12 = OpTypeSampledImage %11
+%_ptr_UniformConstant_12 = OpTypePointer UniformConstant %12
+   %uTexture = OpVariable %_ptr_UniformConstant_12 UniformConstant
+     %v2half = OpTypeVector %half 2
+%_ptr_Input_v2half = OpTypePointer Input %v2half
+         %UV = OpVariable %_ptr_Input_v2half Input
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %15 = OpLoad %12 %uTexture
+         %19 = OpLoad %v2half %UV
+         %23 = OpImageSampleImplicitLod %v4float %15 %19
+		 %24 = OpFConvert %v4half %23
+               OpStore %FragColor %24
+               OpReturn
+               OpFunctionEnd
diff --git a/shaders/comp/outer-product.comp b/shaders/comp/outer-product.comp
new file mode 100644
index 0000000000..9aba2a54b7
--- /dev/null
+++ b/shaders/comp/outer-product.comp
@@ -0,0 +1,37 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(set = 0, binding = 0, std430) writeonly buffer SSBO
+{
+	mat2 m22;
+	mat2x3 m23;
+	mat2x4 m24;
+	mat3x2 m32;
+	mat3 m33;
+	mat3x4 m34;
+	mat4x2 m42;
+	mat4x3 m43;
+	mat4 m44;
+};
+
+layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO
+{
+	vec2 v2;
+	vec3 v3;
+	vec4 v4;
+};
+
+void main()
+{
+	m22 = outerProduct(v2, v2);
+	m23 = outerProduct(v3, v2);
+	m24 = outerProduct(v4, v2);
+
+	m32 = outerProduct(v2, v3);
+	m33 = outerProduct(v3, v3);
+	m34 = outerProduct(v4, v3);
+
+	m42 = outerProduct(v2, v4);
+	m43 = outerProduct(v3, v4);
+	m44 = outerProduct(v4, v4);
+}
diff --git a/shaders/comp/scalar-std450-distance-length-normalize.comp b/shaders/comp/scalar-std450-distance-length-normalize.comp
new file mode 100644
index 0000000000..37414737fd
--- /dev/null
+++ b/shaders/comp/scalar-std450-distance-length-normalize.comp
@@ -0,0 +1,18 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(std430, set = 0, binding = 0) buffer SSBO
+{
+	float a;
+	float b;
+	float c;
+	float d;
+	float e;
+};
+
+void main()
+{
+	c = distance(a, b);
+	d = length(a);
+	e = normalize(a);
+}
diff --git a/shaders/frag/for-loop-continue-control-flow.frag b/shaders/frag/for-loop-continue-control-flow.frag
new file mode 100644
index 0000000000..1f91cca2a4
--- /dev/null
+++ b/shaders/frag/for-loop-continue-control-flow.frag
@@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+void main()
+{
+	FragColor = vec4(0.0);
+	for (int i = 0; i < 3; (0 > 1) ? 1 : i ++)
+	{
+		int a = i;
+		FragColor[a] += float(i);
+	}
+}
diff --git a/shaders/frag/scalar-refract-reflect.frag b/shaders/frag/scalar-refract-reflect.frag
new file mode 100644
index 0000000000..486ed90bd5
--- /dev/null
+++ b/shaders/frag/scalar-refract-reflect.frag
@@ -0,0 +1,11 @@
+#version 450
+layout(location = 0) out float FragColor;
+layout(location = 0) in vec3 vRefract; 
+
+void main()
+{
+	FragColor = refract(vRefract.x, vRefract.y, vRefract.z);
+	FragColor += reflect(vRefract.x, vRefract.y);
+	FragColor += refract(vRefract.xy, vRefract.yz, vRefract.z).y;
+	FragColor += reflect(vRefract.xy, vRefract.zy).y;
+}
diff --git a/shaders/frag/selection-block-dominator.frag b/shaders/frag/selection-block-dominator.frag
new file mode 100644
index 0000000000..257f4e6934
--- /dev/null
+++ b/shaders/frag/selection-block-dominator.frag
@@ -0,0 +1,18 @@
+#version 450
+layout(location = 0) out vec4 FragColor;
+layout(location = 0) flat in int vIndex; 
+
+void main()
+{
+	int v;
+	if (vIndex != 1)
+	{
+		FragColor = vec4(1.0);
+		return;
+	}
+	else
+	{
+		v = 10;
+	}
+	FragColor = vec4(v);
+}
diff --git a/tests-other/msl_resource_binding.spv b/tests-other/msl_resource_binding.spv
new file mode 100644
index 0000000000..1798902467
Binary files /dev/null and b/tests-other/msl_resource_binding.spv differ
diff --git a/tests-other/msl_resource_bindings.cpp b/tests-other/msl_resource_bindings.cpp
new file mode 100644
index 0000000000..fcb3213e1d
--- /dev/null
+++ b/tests-other/msl_resource_bindings.cpp
@@ -0,0 +1,86 @@
+// Testbench for MSL resource binding APIs.
+// It does not validate output at the moment, but it's useful for ad-hoc testing.
+
+#include <spirv_cross_c.h>
+#include <vector>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SPVC_CHECKED_CALL(x) do { \
+	if ((x) != SPVC_SUCCESS) { \
+		fprintf(stderr, "Failed at line %d.\n", __LINE__); \
+		exit(1); \
+	} \
+} while(0)
+
+static std::vector<SpvId> read_file(const char *path)
+{
+	long len;
+	FILE *file = fopen(path, "rb");
+
+	if (!file)
+		return {};
+
+	fseek(file, 0, SEEK_END);
+	len = ftell(file);
+	rewind(file);
+
+	std::vector<SpvId> buffer(len / sizeof(SpvId));
+	if (fread(buffer.data(), 1, len, file) != (size_t)len)
+	{
+		fclose(file);
+		return {};
+	}
+
+	fclose(file);
+	return buffer;
+}
+
+int main(int argc, char **argv)
+{
+	if (argc != 2)
+		return EXIT_FAILURE;
+
+	auto buffer = read_file(argv[1]);
+	if (buffer.empty())
+		return EXIT_FAILURE;
+
+	spvc_context ctx;
+	spvc_parsed_ir parsed_ir;
+	spvc_compiler compiler;
+
+	SPVC_CHECKED_CALL(spvc_context_create(&ctx));
+	SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir));
+	SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_MSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler));
+	SPVC_CHECKED_CALL(spvc_compiler_msl_add_discrete_descriptor_set(compiler, 3));
+
+	spvc_compiler_options opts;
+	SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &opts));
+	SPVC_CHECKED_CALL(spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS, SPVC_TRUE));
+	SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_MSL_VERSION, 20000));
+	SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, opts));
+
+	spvc_msl_resource_binding binding;
+	spvc_msl_resource_binding_init(&binding);
+	binding.binding = SPVC_MSL_ARGUMENT_BUFFER_BINDING;
+	binding.stage = SpvExecutionModelFragment;
+	binding.desc_set = 0;
+	binding.msl_buffer = 2;
+	SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding));
+
+	binding.desc_set = 1;
+	binding.msl_buffer = 3;
+	SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding));
+
+	const char *str;
+	SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str));
+
+	fprintf(stderr, "Output:\n%s\n", str);
+
+	if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 0, SPVC_MSL_ARGUMENT_BUFFER_BINDING))
+		return EXIT_FAILURE;
+
+	if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 1, SPVC_MSL_ARGUMENT_BUFFER_BINDING))
+		return EXIT_FAILURE;
+}
+