#version 450 core

#extension GL_EXT_bfloat16 : require
#extension GL_EXT_float_e4m3 : require
#extension GL_KHR_cooperative_matrix : enable
#extension GL_NV_cooperative_matrix2 : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_scalar_block_layout : enable

layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

const floate4m3_t bc10 = floate4m3_t(10);
const fe4m3vec2 b2c20 = fe4m3vec2(20);
layout(constant_id = 1) const floate4m3_t bsc10 = floate4m3_t(10);

struct S { floate4m3_t b; };
const S sc = S(bc10);

const floate4m3_t bca[2] = {bc10, bc10};

shared floate4m3_t bfs[10];

layout(scalar) buffer B {
    fe4m3vec3 b3;
    fe4m3vec2 b2;
    floate4m3_t b1;
} buf;

floate4m3_t funcfe4m3(floate4m3_t x)
{
    return x;
}

floate4m3_t funcf32(float32_t x)
{
    return floate4m3_t(x);
}

floate4m3_t funcf64(float64_t x)
{
    return floate4m3_t(x);
}

void main()
{
    float f = 2.0;
    floate4m3_t b = floate4m3_t(1.0);
    floate4m3_t(f);
    fe4m3vec2 b2 = fe4m3vec2(f);

    uint8_t u8 = uint8_t(5);
    uint16_t u16 = uint16_t(5);
    uint32_t u32 = 5;
    uint64_t u64 = 5;
    int8_t i8 = int8_t(6);
    int16_t i16 = int16_t(6);
    int32_t i32 = 6;
    int64_t i64 = 6;
    floate4m3_t fe4m3 = floate4m3_t(7);
    float16_t f16 = float16_t(7);
    float32_t f32 = 7;
    float64_t f64 = 7;
    b = floate4m3_t(u8);
    b = floate4m3_t(u16);
    b = floate4m3_t(u32);
    b = floate4m3_t(u64);
    b = floate4m3_t(i8);
    b = floate4m3_t(i16);
    b = floate4m3_t(i32);
    b = floate4m3_t(i64);
    b = floate4m3_t(fe4m3);
    b = floate4m3_t(f16);
    b = floate4m3_t(f32);
    b = floate4m3_t(f64);
    u8 = uint8_t(b);
    u16 = uint16_t(b);
    u32 = uint32_t(b);
    u64 = uint64_t(b);
    i8 = int8_t(b);
    i16 = int16_t(b);
    i32 = int32_t(b);
    i64 = int64_t(b);
    fe4m3 = floate4m3_t(b);
    f16 = float16_t(b);
    f32 = float32_t(b);
    f64 = float64_t(b);

    f16 = b;
    f32 = b;
    f64 = b;

    b = buf.b1;
    b2 = buf.b2;
    fe4m3vec3 b3 = buf.b3;

    coopmat<floate4m3_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmA = coopmat<floate4m3_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(3.0);

    coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmAf = coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(cmA);

    funcfe4m3(b);
    funcf32(b);
    funcf64(b);

    int8_t i8_1 = floate4m3BitsToIntEXT(b);
    i8vec2 i8_2 = floate4m3BitsToIntEXT(b2);
    i8vec3 i8_3 = floate4m3BitsToIntEXT(fe4m3vec3(b2, b));
    i8vec4 i8_4 = floate4m3BitsToIntEXT(fe4m3vec4(b2, b2));

    uint8_t u8_1 = floate4m3BitsToUintEXT(b);
    u8vec2  u8_2 = floate4m3BitsToUintEXT(b2);
    u8vec3  u8_3 = floate4m3BitsToUintEXT(fe4m3vec3(b2, b));
    u8vec4  u8_4 = floate4m3BitsToUintEXT(fe4m3vec4(b2, b2));

    floate4m3_t f8_1 = intBitsToFloate4m3EXT(i8_1);
    fe4m3vec2   f8_2 = intBitsToFloate4m3EXT(i8_2);
    fe4m3vec3   f8_3 = intBitsToFloate4m3EXT(i8_3);
    fe4m3vec4   f8_4 = intBitsToFloate4m3EXT(i8_4);

    f8_1 = uintBitsToFloate4m3EXT(u8_1);
    f8_2 = uintBitsToFloate4m3EXT(u8_2);
    f8_3 = uintBitsToFloate4m3EXT(u8_3);
    f8_4 = uintBitsToFloate4m3EXT(u8_4);
}

floate4m3_t func2(fe4m3vec4 v[2], int i)
{
    floate4m3_t b;
    b = (fe4m3vec2(v[0]))[i];
    b = (fe4m3vec2(v[1])).y;

    v[i].z = v[i][i];

    S s;
    s.b = b;

    b = (i != 0) ? b : floate4m3_t(2);

    if (i == 2) {
        b = v[0].x;
    } else {
        b = v[1].y;
    }

    return b;
}

void saturatedconverts()
{
    float f = 1.0;
    vec2 f2 = vec2(f);
    floate4m3_t b;
    fe4m3vec2 b2;
    uint32_t u;
    int32_t i;
    saturatedConvertEXT(b, f);
    saturatedConvertEXT(b2, f2);
    saturatedConvertEXT(b, u);
    saturatedConvertEXT(b, i);

    coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseAccumulator> matf16C;
    coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> matf16A;
    coopmat<floate4m3_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> mate4m3A;

    mate4m3A = coopmat<floate4m3_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(matf16A);
    mate4m3A = coopmat<floate4m3_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(matf16C);
    saturatedConvertEXT(mate4m3A, matf16A);
    saturatedConvertEXT(mate4m3A, matf16C);
}