#version 450 core #extension GL_KHR_memory_scope_semantics : enable #extension GL_NV_cooperative_vector : enable #extension GL_EXT_shader_explicit_arithmetic_types : enable #extension GL_EXT_buffer_reference : enable layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(set = 0, binding = 0) buffer MatrixBuf { float16_t matrixData[]; } matrixBuf; void main() { coopvecNV v0; coopvecNV v1; vec4 v2; coopVecOuterProductAccumulateNV(v0, v1, matrixBuf.matrixData, 100, 128, gl_CooperativeVectorMatrixLayoutInferencingOptimalNV, gl_ComponentTypeFloat16NV); coopVecOuterProductAccumulateNV(v0, v2, matrixBuf.matrixData, 100, 128, gl_CooperativeVectorMatrixLayoutInferencingOptimalNV, gl_ComponentTypeFloat16NV); coopVecReduceSumAccumulateNV(v2, matrixBuf.matrixData, 100); }