xemu/subprojects/glslang/Test/spv.bfloat16.comp

146 lines
3.5 KiB
Plaintext

#version 450 core
#extension GL_EXT_bfloat16 : require
#extension GL_KHR_cooperative_matrix : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_scalar_block_layout : enable
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
const bfloat16_t bc10 = bfloat16_t(10);
const bf16vec2 b2c20 = bf16vec2(20);
layout(constant_id = 1) const bfloat16_t bsc10 = bfloat16_t(10);
struct S { bfloat16_t b; };
const S sc = S(bc10);
const bfloat16_t bca[2] = {bc10, bc10};
shared bfloat16_t bfs[10];
layout(scalar) buffer B {
bf16vec3 b3;
bf16vec2 b2;
bfloat16_t b1;
} buf;
bfloat16_t funcbf16(bfloat16_t x)
{
return x;
}
bfloat16_t funcf32(float32_t x)
{
return bfloat16_t(x);
}
bfloat16_t funcf64(float64_t x)
{
return bfloat16_t(x);
}
void main()
{
float f = 2.0;
bfloat16_t b = bfloat16_t(1.0);
bfloat16_t(f);
bf16vec2 b2 = bf16vec2(f);
uint8_t u8 = uint8_t(5);
uint16_t u16 = uint16_t(5);
uint32_t u32 = 5;
uint64_t u64 = 5;
int8_t i8 = int8_t(6);
int16_t i16 = int16_t(6);
int32_t i32 = 6;
int64_t i64 = 6;
bfloat16_t bf16 = bfloat16_t(7);
float16_t f16 = float16_t(7);
float32_t f32 = 7;
float64_t f64 = 7;
b = bfloat16_t(u8);
b = bfloat16_t(u16);
b = bfloat16_t(u32);
b = bfloat16_t(u64);
b = bfloat16_t(i8);
b = bfloat16_t(i16);
b = bfloat16_t(i32);
b = bfloat16_t(i64);
b = bfloat16_t(bf16);
b = bfloat16_t(f16);
b = bfloat16_t(f32);
b = bfloat16_t(f64);
u8 = uint8_t(b);
u16 = uint16_t(b);
u32 = uint32_t(b);
u64 = uint64_t(b);
i8 = int8_t(b);
i16 = int16_t(b);
i32 = int32_t(b);
i64 = int64_t(b);
bf16 = bfloat16_t(b);
f16 = float16_t(b);
f32 = float32_t(b);
f64 = float64_t(b);
f32 = b;
f64 = b;
b = buf.b1;
b2 = buf.b2;
bf16vec3 b3 = buf.b3;
dot(b2, b2);
coopmat<bfloat16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmA = coopmat<bfloat16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(3.0);
coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmAf = coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(cmA);
funcbf16(b);
funcf32(b);
funcf64(b);
int16_t i16_1 = bfloat16BitsToIntEXT(b);
i16vec2 i16_2 = bfloat16BitsToIntEXT(b2);
i16vec3 i16_3 = bfloat16BitsToIntEXT(bf16vec3(b2, b));
i16vec4 i16_4 = bfloat16BitsToIntEXT(bf16vec4(b2, b2));
uint16_t u16_1 = bfloat16BitsToUintEXT(b);
u16vec2 u16_2 = bfloat16BitsToUintEXT(b2);
u16vec3 u16_3 = bfloat16BitsToUintEXT(bf16vec3(b2, b));
u16vec4 u16_4 = bfloat16BitsToUintEXT(bf16vec4(b2, b2));
bfloat16_t b16_1 = intBitsToBFloat16EXT(i16_1);
bf16vec2 b16_2 = intBitsToBFloat16EXT(i16_2);
bf16vec3 b16_3 = intBitsToBFloat16EXT(i16_3);
bf16vec4 b16_4 = intBitsToBFloat16EXT(i16_4);
b16_1 = uintBitsToBFloat16EXT(u16_1);
b16_2 = uintBitsToBFloat16EXT(u16_2);
b16_3 = uintBitsToBFloat16EXT(u16_3);
b16_4 = uintBitsToBFloat16EXT(u16_4);
}
bfloat16_t func2(bf16vec4 v[2], int i)
{
bfloat16_t b;
b = (bf16vec2(v[0]))[i];
b = (bf16vec2(v[1])).y;
v[i].z = v[i][i];
S s;
s.b = b;
b = (i != 0) ? b : bfloat16_t(2);
if (i == 2) {
b = v[0].x;
} else {
b = v[1].y;
}
return b;
}