mirror of https://github.com/xemu-project/xemu.git
170 lines
4.4 KiB
Plaintext
170 lines
4.4 KiB
Plaintext
#version 450 core
|
|
|
|
#extension GL_EXT_bfloat16 : require
|
|
#extension GL_EXT_float_e5m2 : require
|
|
#extension GL_KHR_cooperative_matrix : enable
|
|
#extension GL_NV_cooperative_matrix2 : enable
|
|
#extension GL_KHR_memory_scope_semantics : enable
|
|
#extension GL_EXT_shader_explicit_arithmetic_types : enable
|
|
#extension GL_EXT_scalar_block_layout : enable
|
|
|
|
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
|
|
|
const floate5m2_t bc10 = floate5m2_t(10);
|
|
const fe5m2vec2 b2c20 = fe5m2vec2(20);
|
|
layout(constant_id = 1) const floate5m2_t bsc10 = floate5m2_t(10);
|
|
|
|
struct S { floate5m2_t b; };
|
|
const S sc = S(bc10);
|
|
|
|
const floate5m2_t bca[2] = {bc10, bc10};
|
|
|
|
shared floate5m2_t bfs[10];
|
|
|
|
layout(scalar) buffer B {
|
|
fe5m2vec3 b3;
|
|
fe5m2vec2 b2;
|
|
floate5m2_t b1;
|
|
} buf;
|
|
|
|
floate5m2_t funcfe5m2(floate5m2_t x)
|
|
{
|
|
return x;
|
|
}
|
|
|
|
floate5m2_t funcf32(float32_t x)
|
|
{
|
|
return floate5m2_t(x);
|
|
}
|
|
|
|
floate5m2_t funcf64(float64_t x)
|
|
{
|
|
return floate5m2_t(x);
|
|
}
|
|
|
|
void main()
|
|
{
|
|
float f = 2.0;
|
|
floate5m2_t b = floate5m2_t(1.0);
|
|
floate5m2_t(f);
|
|
fe5m2vec2 b2 = fe5m2vec2(f);
|
|
|
|
uint8_t u8 = uint8_t(5);
|
|
uint16_t u16 = uint16_t(5);
|
|
uint32_t u32 = 5;
|
|
uint64_t u64 = 5;
|
|
int8_t i8 = int8_t(6);
|
|
int16_t i16 = int16_t(6);
|
|
int32_t i32 = 6;
|
|
int64_t i64 = 6;
|
|
floate5m2_t fe5m2 = floate5m2_t(7);
|
|
float16_t f16 = float16_t(7);
|
|
float32_t f32 = 7;
|
|
float64_t f64 = 7;
|
|
b = floate5m2_t(u8);
|
|
b = floate5m2_t(u16);
|
|
b = floate5m2_t(u32);
|
|
b = floate5m2_t(u64);
|
|
b = floate5m2_t(i8);
|
|
b = floate5m2_t(i16);
|
|
b = floate5m2_t(i32);
|
|
b = floate5m2_t(i64);
|
|
b = floate5m2_t(fe5m2);
|
|
b = floate5m2_t(f16);
|
|
b = floate5m2_t(f32);
|
|
b = floate5m2_t(f64);
|
|
u8 = uint8_t(b);
|
|
u16 = uint16_t(b);
|
|
u32 = uint32_t(b);
|
|
u64 = uint64_t(b);
|
|
i8 = int8_t(b);
|
|
i16 = int16_t(b);
|
|
i32 = int32_t(b);
|
|
i64 = int64_t(b);
|
|
fe5m2 = floate5m2_t(b);
|
|
f16 = float16_t(b);
|
|
f32 = float32_t(b);
|
|
f64 = float64_t(b);
|
|
|
|
f16 = b;
|
|
f32 = b;
|
|
f64 = b;
|
|
|
|
b = buf.b1;
|
|
b2 = buf.b2;
|
|
fe5m2vec3 b3 = buf.b3;
|
|
|
|
coopmat<floate5m2_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmA = coopmat<floate5m2_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(3.0);
|
|
|
|
coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmAf = coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(cmA);
|
|
|
|
funcfe5m2(b);
|
|
funcf32(b);
|
|
funcf64(b);
|
|
|
|
int8_t i8_1 = floate5m2BitsToIntEXT(b);
|
|
i8vec2 i8_2 = floate5m2BitsToIntEXT(b2);
|
|
i8vec3 i8_3 = floate5m2BitsToIntEXT(fe5m2vec3(b2, b));
|
|
i8vec4 i8_4 = floate5m2BitsToIntEXT(fe5m2vec4(b2, b2));
|
|
|
|
uint8_t u8_1 = floate5m2BitsToUintEXT(b);
|
|
u8vec2 u8_2 = floate5m2BitsToUintEXT(b2);
|
|
u8vec3 u8_3 = floate5m2BitsToUintEXT(fe5m2vec3(b2, b));
|
|
u8vec4 u8_4 = floate5m2BitsToUintEXT(fe5m2vec4(b2, b2));
|
|
|
|
floate5m2_t f8_1 = intBitsToFloate5m2EXT(i8_1);
|
|
fe5m2vec2 f8_2 = intBitsToFloate5m2EXT(i8_2);
|
|
fe5m2vec3 f8_3 = intBitsToFloate5m2EXT(i8_3);
|
|
fe5m2vec4 f8_4 = intBitsToFloate5m2EXT(i8_4);
|
|
|
|
f8_1 = uintBitsToFloate5m2EXT(u8_1);
|
|
f8_2 = uintBitsToFloate5m2EXT(u8_2);
|
|
f8_3 = uintBitsToFloate5m2EXT(u8_3);
|
|
f8_4 = uintBitsToFloate5m2EXT(u8_4);
|
|
}
|
|
|
|
floate5m2_t func2(fe5m2vec4 v[2], int i)
|
|
{
|
|
floate5m2_t b;
|
|
b = (fe5m2vec2(v[0]))[i];
|
|
b = (fe5m2vec2(v[1])).y;
|
|
|
|
v[i].z = v[i][i];
|
|
|
|
S s;
|
|
s.b = b;
|
|
|
|
b = (i != 0) ? b : floate5m2_t(2);
|
|
|
|
if (i == 2) {
|
|
b = v[0].x;
|
|
} else {
|
|
b = v[1].y;
|
|
}
|
|
|
|
return b;
|
|
}
|
|
|
|
void saturatedconverts()
|
|
{
|
|
float f = 1.0;
|
|
vec2 f2 = vec2(f);
|
|
floate5m2_t b;
|
|
fe5m2vec2 b2;
|
|
uint32_t u;
|
|
int32_t i;
|
|
saturatedConvertEXT(b, f);
|
|
saturatedConvertEXT(b2, f2);
|
|
saturatedConvertEXT(b, u);
|
|
saturatedConvertEXT(b, i);
|
|
|
|
coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseAccumulator> matf16C;
|
|
coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> matf16A;
|
|
coopmat<floate5m2_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> mate5m2A;
|
|
|
|
mate5m2A = coopmat<floate5m2_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(matf16A);
|
|
mate5m2A = coopmat<floate5m2_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(matf16C);
|
|
saturatedConvertEXT(mate5m2A, matf16A);
|
|
saturatedConvertEXT(mate5m2A, matf16C);
|
|
}
|