forked from ShuriZma/suyu
1
0
Fork 0

GetUnquantizedWeightVector

This commit is contained in:
Ameer J 2023-08-09 17:21:33 -04:00
parent 9058486b9b
commit 70f8ffb787
1 changed files with 62 additions and 68 deletions

View File

@ -804,11 +804,7 @@ uint UnquantizeTexelWeight(EncodingData val) {
return result; return result;
} }
uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
const uint num_planes = is_dual_plane ? 2 : 1; const uint num_planes = is_dual_plane ? 2 : 1;
const uint area = size.x * size.y; const uint area = size.x * size.y;
const uint loop_count = min(result_index, area * num_planes); const uint loop_count = min(result_index, area * num_planes);
@ -818,9 +814,20 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
result_vector[array_index][vector_index] = result_vector[array_index][vector_index] =
UnquantizeTexelWeight(GetEncodingFromVector(itr)); UnquantizeTexelWeight(GetEncodingFromVector(itr));
} }
for (uint plane = 0; plane < num_planes; ++plane) { }
for (uint t = 0; t < block_dims.y; t++) {
for (uint s = 0; s < block_dims.x; s++) { uint GetUnquantizedTexelWieght(uint offset_base, uint plane, bool is_dual_plane) {
const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base;
const uint array_index = offset / 4;
const uint vector_index = offset % 4;
return result_vector[array_index][vector_index];
}
uvec4 GetUnquantizedWeightVector(uint t, uint s, uvec2 size, uint plane_index, bool is_dual_plane) {
const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
const uint area = size.x * size.y;
const uint cs = Ds * s; const uint cs = Ds * s;
const uint ct = Dt * t; const uint ct = Dt * t;
const uint gs = (cs * (size.x - 1) + 32) >> 6; const uint gs = (cs * (size.x - 1) + 32) >> 6;
@ -836,40 +843,42 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
const uvec4 w = uvec4(w00, w01, w10, w11); const uvec4 w = uvec4(w00, w01, w10, w11);
const uint v0 = jt * size.x + js; const uint v0 = jt * size.x + js;
uvec4 p = uvec4(0); uvec4 p0 = uvec4(0);
uvec4 p1 = uvec4(0);
#define VectorIndicesFromBase(offset_base) \
const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; \
const uint array_index = offset / 4; \
const uint vector_index = offset % 4;
if (v0 < area) { if (v0 < area) {
const uint offset_base = v0; const uint offset_base = v0;
VectorIndicesFromBase(offset_base); p0.x = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
p.x = result_vector[array_index][vector_index]; p1.x = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
} }
if ((v0 + 1) < (area)) { if ((v0 + 1) < (area)) {
const uint offset_base = v0 + 1; const uint offset_base = v0 + 1;
VectorIndicesFromBase(offset_base); p0.y = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
p.y = result_vector[array_index][vector_index]; p1.y = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
} }
if ((v0 + size.x) < (area)) { if ((v0 + size.x) < (area)) {
const uint offset_base = v0 + size.x; const uint offset_base = v0 + size.x;
VectorIndicesFromBase(offset_base); p0.z = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
p.z = result_vector[array_index][vector_index]; p1.z = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
} }
if ((v0 + size.x + 1) < (area)) { if ((v0 + size.x + 1) < (area)) {
const uint offset_base = v0 + size.x + 1; const uint offset_base = v0 + size.x + 1;
VectorIndicesFromBase(offset_base); p0.w = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
p.w = result_vector[array_index][vector_index]; p1.w = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
}
const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane;
const uint array_index = offset / 4;
const uint vector_index = offset % 4;
unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4;
} }
const uint primary_weight = (uint(dot(p0, w)) + 8) >> 4;
uvec4 weight_vec = uvec4(primary_weight);
if (is_dual_plane) {
const uint secondary_weight = (uint(dot(p1, w)) + 8) >> 4;
for (uint c = 0; c < 4; c++) {
const bool is_secondary = ((plane_index + 1u) & 3u) == c;
weight_vec[c] = is_secondary ? secondary_weight : primary_weight;
} }
} }
return weight_vec;
} }
int FindLayout(uint mode) { int FindLayout(uint mode) {
@ -1155,25 +1164,10 @@ void DecompressBlock(ivec3 coord) {
} }
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
const uint weight_offset = (j * block_dims.x + i); const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane);
const uint array_index = weight_offset / 4;
const uint vector_index = weight_offset % 4;
const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
uvec4 weight_vec = uvec4(primary_weight);
if (dual_plane) {
const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS;
const uint secondary_array_index = secondary_weight_offset / 4;
const uint secondary_vector_index = secondary_weight_offset % 4;
const uint secondary_weight =
unquantized_texel_weights[secondary_array_index][secondary_vector_index];
for (uint c = 0; c < 4; c++) {
const bool is_secondary = ((plane_index + 1u) & 3u) == c;
weight_vec[c] = is_secondary ? secondary_weight : primary_weight;
}
}
const vec4 Cf = const vec4 Cf =
vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64);
const vec4 p = (Cf / 65535.0); const vec4 p = (Cf / 65535.0f);
imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
} }
} }