#version 450 core #extension GL_KHR_memory_scope_semantics : enable #extension GL_NV_cooperative_vector : enable #extension GL_EXT_shader_explicit_arithmetic_types : enable #extension GL_EXT_buffer_reference : enable #extension GL_EXT_nonuniform_qualifier : enable layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(set = 0, binding = 0) buffer MatrixBuf { u64vec3 matrixData[]; } matrixBuf; layout(set = 0, binding = 0) buffer MatrixBuf2 { coherent float16_t matrixData[]; } matrixBuf2; layout(set = 0, binding = 0) coherent buffer MatrixBuf3 { volatile float32_t matrixData[]; } matrixBuf3; layout(set = 0, binding = 0, buffer_reference) buffer MatrixBuf4 { coherent float16_t matrixData[]; }; shared uint32_t matrixShmem[100]; void main() { coopvecNV v; coopVecLoadNV(v, matrixBuf.matrixData, 128); coopVecStoreNV(v, matrixBuf.matrixData, 144); coopVecLoadNV(v, matrixBuf2.matrixData, 128); coopVecStoreNV(v, matrixBuf2.matrixData, 144); coopVecLoadNV(v, matrixBuf3.matrixData, 128); coopVecStoreNV(v, matrixBuf3.matrixData, 144); MatrixBuf4 matrixBuf4; coopVecLoadNV(v, matrixBuf4.matrixData, 128); coopVecStoreNV(v, matrixBuf4.matrixData, 144); coopVecLoadNV(v, matrixShmem, 128); coopVecStoreNV(v, matrixShmem, 144); }