mirror of https://git.suyu.dev/suyu/suyu
Merge pull request #6496 from ameerj/astc-fixes
astc: Various robustness enhancements for the gpu decoder
This commit is contained in:
commit
c805c0b395
|
@ -11,12 +11,8 @@
|
||||||
#define UNIFORM(n)
|
#define UNIFORM(n)
|
||||||
#define BINDING_INPUT_BUFFER 0
|
#define BINDING_INPUT_BUFFER 0
|
||||||
#define BINDING_ENC_BUFFER 1
|
#define BINDING_ENC_BUFFER 1
|
||||||
#define BINDING_6_TO_8_BUFFER 2
|
#define BINDING_SWIZZLE_BUFFER 2
|
||||||
#define BINDING_7_TO_8_BUFFER 3
|
#define BINDING_OUTPUT_IMAGE 3
|
||||||
#define BINDING_8_TO_8_BUFFER 4
|
|
||||||
#define BINDING_BYTE_TO_16_BUFFER 5
|
|
||||||
#define BINDING_SWIZZLE_BUFFER 6
|
|
||||||
#define BINDING_OUTPUT_IMAGE 7
|
|
||||||
|
|
||||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
|
@ -26,10 +22,6 @@
|
||||||
#define BINDING_SWIZZLE_BUFFER 0
|
#define BINDING_SWIZZLE_BUFFER 0
|
||||||
#define BINDING_INPUT_BUFFER 1
|
#define BINDING_INPUT_BUFFER 1
|
||||||
#define BINDING_ENC_BUFFER 2
|
#define BINDING_ENC_BUFFER 2
|
||||||
#define BINDING_6_TO_8_BUFFER 3
|
|
||||||
#define BINDING_7_TO_8_BUFFER 4
|
|
||||||
#define BINDING_8_TO_8_BUFFER 5
|
|
||||||
#define BINDING_BYTE_TO_16_BUFFER 6
|
|
||||||
#define BINDING_OUTPUT_IMAGE 0
|
#define BINDING_OUTPUT_IMAGE 0
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -76,19 +68,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
|
||||||
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
|
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
|
||||||
EncodingData encoding_values[];
|
EncodingData encoding_values[];
|
||||||
};
|
};
|
||||||
// ASTC Precompiled tables
|
|
||||||
layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 {
|
|
||||||
uint REPLICATE_6_BIT_TO_8_TABLE[];
|
|
||||||
};
|
|
||||||
layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 {
|
|
||||||
uint REPLICATE_7_BIT_TO_8_TABLE[];
|
|
||||||
};
|
|
||||||
layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
|
|
||||||
uint REPLICATE_8_BIT_TO_8_TABLE[];
|
|
||||||
};
|
|
||||||
layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 {
|
|
||||||
uint REPLICATE_BYTE_TO_16_TABLE[];
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
|
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
|
||||||
|
|
||||||
|
@ -139,6 +118,19 @@ const uint REPLICATE_4_BIT_TO_6_TABLE[16] =
|
||||||
const uint REPLICATE_5_BIT_TO_6_TABLE[32] =
|
const uint REPLICATE_5_BIT_TO_6_TABLE[32] =
|
||||||
uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45,
|
uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45,
|
||||||
47, 49, 51, 53, 55, 57, 59, 61, 63);
|
47, 49, 51, 53, 55, 57, 59, 61, 63);
|
||||||
|
const uint REPLICATE_6_BIT_TO_8_TABLE[64] =
|
||||||
|
uint[](0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89,
|
||||||
|
93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162,
|
||||||
|
166, 170, 174, 178, 182, 186, 190, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235,
|
||||||
|
239, 243, 247, 251, 255);
|
||||||
|
const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
|
||||||
|
uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44,
|
||||||
|
46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88,
|
||||||
|
90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126,
|
||||||
|
129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163,
|
||||||
|
165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199,
|
||||||
|
201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235,
|
||||||
|
237, 239, 241, 243, 245, 247, 249, 251, 253, 255);
|
||||||
|
|
||||||
// Input ASTC texture globals
|
// Input ASTC texture globals
|
||||||
uint current_index = 0;
|
uint current_index = 0;
|
||||||
|
@ -207,8 +199,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) {
|
||||||
}
|
}
|
||||||
|
|
||||||
uvec4 ReplicateByteTo16(uvec4 value) {
|
uvec4 ReplicateByteTo16(uvec4 value) {
|
||||||
return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y],
|
return value * 0x101;
|
||||||
REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint ReplicateBitTo7(uint value) {
|
uint ReplicateBitTo7(uint value) {
|
||||||
|
@ -236,7 +227,7 @@ uint FastReplicateTo8(uint value, uint num_bits) {
|
||||||
case 7:
|
case 7:
|
||||||
return REPLICATE_7_BIT_TO_8_TABLE[value];
|
return REPLICATE_7_BIT_TO_8_TABLE[value];
|
||||||
case 8:
|
case 8:
|
||||||
return REPLICATE_8_BIT_TO_8_TABLE[value];
|
return value;
|
||||||
}
|
}
|
||||||
return Replicate(value, num_bits, 8);
|
return Replicate(value, num_bits, 8);
|
||||||
}
|
}
|
||||||
|
@ -1327,6 +1318,9 @@ void main() {
|
||||||
offset += swizzle;
|
offset += swizzle;
|
||||||
|
|
||||||
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
|
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
|
||||||
|
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
uint block_index =
|
uint block_index =
|
||||||
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
|
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||||
swizzle_table_buffer.Create();
|
swizzle_table_buffer.Create();
|
||||||
astc_buffer.Create();
|
astc_buffer.Create();
|
||||||
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
||||||
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0);
|
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
|
||||||
|
0);
|
||||||
}
|
}
|
||||||
|
|
||||||
UtilShaders::~UtilShaders() = default;
|
UtilShaders::~UtilShaders() = default;
|
||||||
|
@ -79,12 +80,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||||
static constexpr GLuint BINDING_ENC_BUFFER = 2;
|
static constexpr GLuint BINDING_ENC_BUFFER = 2;
|
||||||
|
|
||||||
static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
|
|
||||||
static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
|
|
||||||
static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
|
|
||||||
static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6;
|
|
||||||
|
|
||||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||||
|
|
||||||
const Extent2D tile_size{
|
const Extent2D tile_size{
|
||||||
|
@ -93,21 +88,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
};
|
};
|
||||||
program_manager.BindHostCompute(astc_decoder_program.handle);
|
program_manager.BindHostCompute(astc_decoder_program.handle);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle,
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
|
||||||
offsetof(AstcBufferData, encoding_values),
|
|
||||||
sizeof(AstcBufferData::encoding_values));
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle,
|
|
||||||
offsetof(AstcBufferData, replicate_6_to_8),
|
|
||||||
sizeof(AstcBufferData::replicate_6_to_8));
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle,
|
|
||||||
offsetof(AstcBufferData, replicate_7_to_8),
|
|
||||||
sizeof(AstcBufferData::replicate_7_to_8));
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
|
|
||||||
offsetof(AstcBufferData, replicate_8_to_8),
|
|
||||||
sizeof(AstcBufferData::replicate_8_to_8));
|
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle,
|
|
||||||
offsetof(AstcBufferData, replicate_byte_to_16),
|
|
||||||
sizeof(AstcBufferData::replicate_byte_to_16));
|
|
||||||
|
|
||||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||||
glUniform2ui(1, tile_size.width, tile_size.height);
|
glUniform2ui(1, tile_size.width, tile_size.height);
|
||||||
|
@ -137,6 +118,12 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
|
|
||||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||||
}
|
}
|
||||||
|
// Precautionary barrier to ensure the compute shader is done decoding prior to texture access.
|
||||||
|
// GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate
|
||||||
|
// glMemoryBarrier call by the texture cache runtime
|
||||||
|
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT |
|
||||||
|
GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT |
|
||||||
|
GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
||||||
program_manager.RestoreGuestCompute();
|
program_manager.RestoreGuestCompute();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,19 +30,16 @@
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
using Tegra::Texture::SWIZZLE_TABLE;
|
using Tegra::Texture::SWIZZLE_TABLE;
|
||||||
using Tegra::Texture::ASTC::EncodingsValues;
|
using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
|
||||||
using namespace Tegra::Texture::ASTC;
|
using namespace Tegra::Texture::ASTC;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
|
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
|
||||||
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
|
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
|
||||||
constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
|
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
|
||||||
constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
|
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
|
||||||
constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
|
constexpr size_t ASTC_NUM_BINDINGS = 4;
|
||||||
constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5;
|
|
||||||
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6;
|
|
||||||
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7;
|
|
||||||
|
|
||||||
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
|
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
|
||||||
return {
|
return {
|
||||||
|
@ -71,7 +68,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
|
std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
|
||||||
return {{
|
return {{
|
||||||
{
|
{
|
||||||
.binding = ASTC_BINDING_INPUT_BUFFER,
|
.binding = ASTC_BINDING_INPUT_BUFFER,
|
||||||
|
@ -87,34 +84,6 @@ std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
.pImmutableSamplers = nullptr,
|
.pImmutableSamplers = nullptr,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.binding = ASTC_BINDING_6_TO_8_BUFFER,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.pImmutableSamplers = nullptr,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.binding = ASTC_BINDING_7_TO_8_BUFFER,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.pImmutableSamplers = nullptr,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.binding = ASTC_BINDING_8_TO_8_BUFFER,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.pImmutableSamplers = nullptr,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.binding = ASTC_BINDING_BYTE_TO_16_BUFFER,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.pImmutableSamplers = nullptr,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
|
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
@ -143,7 +112,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() {
|
std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
|
||||||
|
BuildASTCPassDescriptorUpdateTemplateEntry() {
|
||||||
return {{
|
return {{
|
||||||
{
|
{
|
||||||
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
|
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
|
||||||
|
@ -161,38 +131,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT
|
||||||
.offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
|
.offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
.stride = sizeof(DescriptorUpdateEntry),
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.dstBinding = ASTC_BINDING_6_TO_8_BUFFER,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
|
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.dstBinding = ASTC_BINDING_7_TO_8_BUFFER,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
|
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.dstBinding = ASTC_BINDING_8_TO_8_BUFFER,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
|
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry),
|
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
|
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
|
||||||
.dstArrayElement = 0,
|
.dstArrayElement = 0,
|
||||||
|
@ -222,15 +160,6 @@ struct AstcPushConstants {
|
||||||
u32 block_height_mask;
|
u32 block_height_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AstcBufferData {
|
|
||||||
decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE;
|
|
||||||
decltype(EncodingsValues) encoding_values = EncodingsValues;
|
|
||||||
decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
|
|
||||||
} constexpr ASTC_BUFFER_DATA;
|
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
|
VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
|
||||||
|
@ -423,7 +352,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
|
||||||
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
||||||
|
|
||||||
void ASTCDecoderPass::MakeDataBuffer() {
|
void ASTCDecoderPass::MakeDataBuffer() {
|
||||||
constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE);
|
constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
|
||||||
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
|
@ -437,9 +366,10 @@ void ASTCDecoderPass::MakeDataBuffer() {
|
||||||
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
|
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
|
||||||
|
|
||||||
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
|
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
|
||||||
std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA));
|
std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
|
||||||
|
sizeof(ASTC_ENCODINGS_VALUES));
|
||||||
// Tack on the swizzle table at the end of the buffer
|
// Tack on the swizzle table at the end of the buffer
|
||||||
std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE,
|
std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
|
||||||
sizeof(SWIZZLE_TABLE));
|
sizeof(SWIZZLE_TABLE));
|
||||||
|
|
||||||
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
|
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
|
||||||
|
@ -509,18 +439,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
update_descriptor_queue.Acquire();
|
update_descriptor_queue.Acquire();
|
||||||
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||||
image.guest_size_bytes - swizzle.buffer_offset);
|
image.guest_size_bytes - swizzle.buffer_offset);
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),
|
update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
|
||||||
sizeof(AstcBufferData::encoding_values));
|
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8),
|
|
||||||
sizeof(AstcBufferData::replicate_6_to_8));
|
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8),
|
|
||||||
sizeof(AstcBufferData::replicate_7_to_8));
|
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
|
|
||||||
sizeof(AstcBufferData::replicate_8_to_8));
|
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer,
|
|
||||||
offsetof(AstcBufferData, replicate_byte_to_16),
|
|
||||||
sizeof(AstcBufferData::replicate_byte_to_16));
|
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
|
|
||||||
sizeof(SWIZZLE_TABLE));
|
sizeof(SWIZZLE_TABLE));
|
||||||
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||||
|
|
||||||
|
@ -569,6 +489,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
||||||
});
|
});
|
||||||
|
scheduler.Finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -269,7 +269,7 @@ static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result,
|
||||||
static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
|
static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
|
||||||
u32 nValues) {
|
u32 nValues) {
|
||||||
// Determine encoding parameters
|
// Determine encoding parameters
|
||||||
IntegerEncodedValue val = EncodingsValues[maxRange];
|
IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange];
|
||||||
|
|
||||||
// Start decoding
|
// Start decoding
|
||||||
u32 nValsDecoded = 0;
|
u32 nValsDecoded = 0;
|
||||||
|
@ -310,7 +310,7 @@ struct TexelWeightParams {
|
||||||
nIdxs *= 2;
|
nIdxs *= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs);
|
return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(nIdxs);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 GetNumWeightValues() const {
|
u32 GetNumWeightValues() const {
|
||||||
|
@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
|
||||||
static constexpr u32 ReplicateByteTo16(std::size_t value) {
|
static constexpr u32 ReplicateByteTo16(std::size_t value) {
|
||||||
return REPLICATE_BYTE_TO_16_TABLE[value];
|
return REPLICATE_BYTE_TO_16_TABLE[value];
|
||||||
}
|
}
|
||||||
|
@ -753,12 +755,12 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co
|
||||||
// figure out the max value for each of them...
|
// figure out the max value for each of them...
|
||||||
u32 range = 256;
|
u32 range = 256;
|
||||||
while (--range > 0) {
|
while (--range > 0) {
|
||||||
IntegerEncodedValue val = EncodingsValues[range];
|
IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[range];
|
||||||
u32 bitLength = val.GetBitLength(nValues);
|
u32 bitLength = val.GetBitLength(nValues);
|
||||||
if (bitLength <= nBitsForColorData) {
|
if (bitLength <= nBitsForColorData) {
|
||||||
// Find the smallest possible range that matches the given encoding
|
// Find the smallest possible range that matches the given encoding
|
||||||
while (--range > 0) {
|
while (--range > 0) {
|
||||||
IntegerEncodedValue newval = EncodingsValues[range];
|
IntegerEncodedValue newval = ASTC_ENCODINGS_VALUES[range];
|
||||||
if (!newval.MatchesEncoding(val)) {
|
if (!newval.MatchesEncoding(val)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,7 @@ constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
|
||||||
return encodings;
|
return encodings;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues();
|
constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
|
||||||
|
|
||||||
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
|
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
|
||||||
// is the same as [(num_bits - 1):0] and repeats all the way down.
|
// is the same as [(num_bits - 1):0] and repeats all the way down.
|
||||||
|
@ -116,19 +116,10 @@ constexpr auto MakeReplicateTable() {
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
|
|
||||||
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
|
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
|
||||||
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
|
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
|
||||||
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
|
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
|
||||||
|
|
||||||
struct AstcBufferData {
|
|
||||||
decltype(EncodingsValues) encoding_values = EncodingsValues;
|
|
||||||
decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
|
|
||||||
} constexpr ASTC_BUFFER_DATA;
|
|
||||||
|
|
||||||
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
||||||
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
|
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue