forked from ShuriZma/suyu
1
0
Fork 0

Revert "global endpoints"

This reverts commit d8f5bfd1df2b7469ef6abcee182aa110602d1751.
This commit is contained in:
Ameer J 2023-08-01 20:53:25 -04:00
parent c077e467c4
commit 553dd3e120
1 changed files with 40 additions and 36 deletions

View File

@ -94,8 +94,6 @@ uint result_index = 0;
uint result_vector_max_index; uint result_vector_max_index;
bool result_limit_reached = false; bool result_limit_reached = false;
uvec4 endpoints[2][4];
// EncodingData helpers // EncodingData helpers
uint Encoding(EncodingData val) { uint Encoding(EncodingData val) {
return bitfieldExtract(val.data, 0, 8); return bitfieldExtract(val.data, 0, 8);
@ -675,7 +673,7 @@ ivec4 BlueContract(int a, int r, int g, int b) {
return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); return ivec4(a, (r + b) >> 1, (g + b) >> 1, b);
} }
void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
inout uint colvals_index) { inout uint colvals_index) {
#define READ_UINT_VALUES(N) \ #define READ_UINT_VALUES(N) \
uint v[N]; \ uint v[N]; \
@ -694,22 +692,22 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
switch (color_endpoint_mode) { switch (color_endpoint_mode) {
case 0: { case 0: {
READ_UINT_VALUES(2) READ_UINT_VALUES(2)
endpoints[0][ep_index] = uvec4(0xFF, v[0], v[0], v[0]); ep1 = uvec4(0xFF, v[0], v[0], v[0]);
endpoints[1][ep_index] = uvec4(0xFF, v[1], v[1], v[1]); ep2 = uvec4(0xFF, v[1], v[1], v[1]);
break; break;
} }
case 1: { case 1: {
READ_UINT_VALUES(2) READ_UINT_VALUES(2)
const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); const uint L0 = (v[0] >> 2) | (v[1] & 0xC0);
const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU);
endpoints[0][ep_index] = uvec4(0xFF, L0, L0, L0); ep1 = uvec4(0xFF, L0, L0, L0);
endpoints[1][ep_index] = uvec4(0xFF, L1, L1, L1); ep2 = uvec4(0xFF, L1, L1, L1);
break; break;
} }
case 4: { case 4: {
READ_UINT_VALUES(4) READ_UINT_VALUES(4)
endpoints[0][ep_index] = uvec4(v[2], v[0], v[0], v[0]); ep1 = uvec4(v[2], v[0], v[0], v[0]);
endpoints[1][ep_index] = uvec4(v[3], v[1], v[1], v[1]); ep2 = uvec4(v[3], v[1], v[1], v[1]);
break; break;
} }
case 5: { case 5: {
@ -720,24 +718,24 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
transferred = BitTransferSigned(v[3], v[2]); transferred = BitTransferSigned(v[3], v[2]);
v[3] = transferred.x; v[3] = transferred.x;
v[2] = transferred.y; v[2] = transferred.y;
endpoints[0][ep_index] = ClampByte(ivec4(v[2], v[0], v[0], v[0])); ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0]));
endpoints[1][ep_index] = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]));
break; break;
} }
case 6: { case 6: {
READ_UINT_VALUES(4) READ_UINT_VALUES(4)
endpoints[0][ep_index] = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8);
endpoints[1][ep_index] = uvec4(0xFF, v[0], v[1], v[2]); ep2 = uvec4(0xFF, v[0], v[1], v[2]);
break; break;
} }
case 8: { case 8: {
READ_UINT_VALUES(6) READ_UINT_VALUES(6)
if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) {
endpoints[0][ep_index] = uvec4(0xFF, v[0], v[2], v[4]); ep1 = uvec4(0xFF, v[0], v[2], v[4]);
endpoints[1][ep_index] = uvec4(0xFF, v[1], v[3], v[5]); ep2 = uvec4(0xFF, v[1], v[3], v[5]);
} else { } else {
endpoints[0][ep_index] = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5])));
endpoints[1][ep_index] = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4])));
} }
break; break;
} }
@ -753,28 +751,28 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
v[5] = transferred.x; v[5] = transferred.x;
v[4] = transferred.y; v[4] = transferred.y;
if ((v[1] + v[3] + v[5]) >= 0) { if ((v[1] + v[3] + v[5]) >= 0) {
endpoints[0][ep_index] = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4]));
endpoints[1][ep_index] = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]));
} else { } else {
endpoints[0][ep_index] = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]));
endpoints[1][ep_index] = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4]));
} }
break; break;
} }
case 10: { case 10: {
READ_UINT_VALUES(6) READ_UINT_VALUES(6)
endpoints[0][ep_index] = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8);
endpoints[1][ep_index] = uvec4(v[5], v[0], v[1], v[2]); ep2 = uvec4(v[5], v[0], v[1], v[2]);
break; break;
} }
case 12: { case 12: {
READ_UINT_VALUES(8) READ_UINT_VALUES(8)
if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) {
endpoints[0][ep_index] = uvec4(v[6], v[0], v[2], v[4]); ep1 = uvec4(v[6], v[0], v[2], v[4]);
endpoints[1][ep_index] = uvec4(v[7], v[1], v[3], v[5]); ep2 = uvec4(v[7], v[1], v[3], v[5]);
} else { } else {
endpoints[0][ep_index] = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5])));
endpoints[1][ep_index] = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4])));
} }
break; break;
} }
@ -796,18 +794,18 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
v[6] = transferred.y; v[6] = transferred.y;
if ((v[1] + v[3] + v[5]) >= 0) { if ((v[1] + v[3] + v[5]) >= 0) {
endpoints[0][ep_index] = ClampByte(ivec4(v[6], v[0], v[2], v[4])); ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4]));
endpoints[1][ep_index] = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]));
} else { } else {
endpoints[0][ep_index] = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]));
endpoints[1][ep_index] = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4]));
} }
break; break;
} }
default: { default: {
// HDR mode, or more likely a bug computing the color_endpoint_mode // HDR mode, or more likely a bug computing the color_endpoint_mode
endpoints[0][ep_index] = uvec4(0xFF, 0xFF, 0, 0); ep1 = uvec4(0xFF, 0xFF, 0, 0);
endpoints[1][ep_index] = uvec4(0xFF, 0xFF, 0, 0); ep2 = uvec4(0xFF, 0xFF, 0, 0);
break; break;
} }
} }
@ -1200,6 +1198,10 @@ void DecompressBlock(ivec3 coord) {
color_endpoint_mode[i] = cem; color_endpoint_mode[i] = cem;
} }
} }
uvec4 endpoints0[4];
uvec4 endpoints1[4];
{
// This decode phase should at most push 32 elements into the vector // This decode phase should at most push 32 elements into the vector
result_vector_max_index = 32; result_vector_max_index = 32;
@ -1207,8 +1209,10 @@ void DecompressBlock(ivec3 coord) {
uint colvals_index = 0; uint colvals_index = 0;
DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits);
for (uint i = 0; i < num_partitions; i++) { for (uint i = 0; i < num_partitions; i++) {
ComputeEndpoints(i, color_endpoint_mode[i], colvals_index); ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i],
colvals_index);
} }
}
color_endpoint_data = local_buff; color_endpoint_data = local_buff;
color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
const uint clear_byte_start = (weight_bits >> 3) + 1; const uint clear_byte_start = (weight_bits >> 3) + 1;
@ -1243,8 +1247,8 @@ void DecompressBlock(ivec3 coord) {
local_partition = Select2DPartition(partition_index, i, j, num_partitions, local_partition = Select2DPartition(partition_index, i, j, num_partitions,
(block_dims.y * block_dims.x) < 32); (block_dims.y * block_dims.x) < 32);
} }
const uvec4 C0 = ReplicateByteTo16(endpoints[0][local_partition]); const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
const uvec4 C1 = ReplicateByteTo16(endpoints[1][local_partition]); const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
const uint weight_offset = (j * block_dims.x + i); const uint weight_offset = (j * block_dims.x + i);
const uint array_index = weight_offset / 4; const uint array_index = weight_offset / 4;
const uint vector_index = bfe(weight_offset, 0, 2); const uint vector_index = bfe(weight_offset, 0, 2);