mirror of https://git.suyu.dev/suyu/suyu
astc_decoder: Optimize the use EncodingData
This buffer was a list of EncodingData structures sorted by their bit length, with some duplication from the cpu decoder implementation. We can take advantage of its sorted property to optimize its usage in the shader. Thanks to wwylele for the optimization idea.
This commit is contained in:
parent
15c0c213b1
commit
5665d05547
|
@ -10,18 +10,16 @@
|
||||||
#define END_PUSH_CONSTANTS };
|
#define END_PUSH_CONSTANTS };
|
||||||
#define UNIFORM(n)
|
#define UNIFORM(n)
|
||||||
#define BINDING_INPUT_BUFFER 0
|
#define BINDING_INPUT_BUFFER 0
|
||||||
#define BINDING_ENC_BUFFER 1
|
#define BINDING_SWIZZLE_BUFFER 1
|
||||||
#define BINDING_SWIZZLE_BUFFER 2
|
#define BINDING_OUTPUT_IMAGE 2
|
||||||
#define BINDING_OUTPUT_IMAGE 3
|
|
||||||
|
|
||||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
#define BEGIN_PUSH_CONSTANTS
|
#define BEGIN_PUSH_CONSTANTS
|
||||||
#define END_PUSH_CONSTANTS
|
#define END_PUSH_CONSTANTS
|
||||||
#define UNIFORM(n) layout(location = n) uniform
|
#define UNIFORM(n) layout(location = n) uniform
|
||||||
#define BINDING_SWIZZLE_BUFFER 0
|
#define BINDING_INPUT_BUFFER 0
|
||||||
#define BINDING_INPUT_BUFFER 1
|
#define BINDING_SWIZZLE_BUFFER 1
|
||||||
#define BINDING_ENC_BUFFER 2
|
|
||||||
#define BINDING_OUTPUT_IMAGE 0
|
#define BINDING_OUTPUT_IMAGE 0
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -64,11 +62,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
|
||||||
uint astc_data[];
|
uint astc_data[];
|
||||||
};
|
};
|
||||||
|
|
||||||
// ASTC Encodings data
|
|
||||||
layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues {
|
|
||||||
EncodingData encoding_values[];
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
|
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
|
||||||
|
|
||||||
const uint GOB_SIZE_X = 64;
|
const uint GOB_SIZE_X = 64;
|
||||||
|
@ -94,6 +87,19 @@ const int JUST_BITS = 0;
|
||||||
const int QUINT = 1;
|
const int QUINT = 1;
|
||||||
const int TRIT = 2;
|
const int TRIT = 2;
|
||||||
|
|
||||||
|
// ASTC Encodings data, sorted in ascending order based on their BitLength value
|
||||||
|
// (see GetBitLength() function)
|
||||||
|
EncodingData encoding_values[22] = EncodingData[](
|
||||||
|
EncodingData(JUST_BITS, 0, 0, 0), EncodingData(JUST_BITS, 1, 0, 0), EncodingData(TRIT, 0, 0, 0),
|
||||||
|
EncodingData(JUST_BITS, 2, 0, 0), EncodingData(QUINT, 0, 0, 0), EncodingData(TRIT, 1, 0, 0),
|
||||||
|
EncodingData(JUST_BITS, 3, 0, 0), EncodingData(QUINT, 1, 0, 0), EncodingData(TRIT, 2, 0, 0),
|
||||||
|
EncodingData(JUST_BITS, 4, 0, 0), EncodingData(QUINT, 2, 0, 0), EncodingData(TRIT, 3, 0, 0),
|
||||||
|
EncodingData(JUST_BITS, 5, 0, 0), EncodingData(QUINT, 3, 0, 0), EncodingData(TRIT, 4, 0, 0),
|
||||||
|
EncodingData(JUST_BITS, 6, 0, 0), EncodingData(QUINT, 4, 0, 0), EncodingData(TRIT, 5, 0, 0),
|
||||||
|
EncodingData(JUST_BITS, 7, 0, 0), EncodingData(QUINT, 5, 0, 0), EncodingData(TRIT, 6, 0, 0),
|
||||||
|
EncodingData(JUST_BITS, 8, 0, 0)
|
||||||
|
);
|
||||||
|
|
||||||
// The following constants are expanded variants of the Replicate()
|
// The following constants are expanded variants of the Replicate()
|
||||||
// function calls corresponding to the following arguments:
|
// function calls corresponding to the following arguments:
|
||||||
// value: index into the generated table
|
// value: index into the generated table
|
||||||
|
@ -596,22 +602,16 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) {
|
||||||
for (uint i = 0; i < num_partitions; i++) {
|
for (uint i = 0; i < num_partitions; i++) {
|
||||||
num_values += ((modes[i] >> 2) + 1) << 1;
|
num_values += ((modes[i] >> 2) + 1) << 1;
|
||||||
}
|
}
|
||||||
int range = 256;
|
// Find the largest encoding that's within color_data_bits
|
||||||
while (--range > 0) {
|
// TODO(ameerj): profile with binary search
|
||||||
EncodingData val = encoding_values[range];
|
int range = 0;
|
||||||
|
while (++range < encoding_values.length()) {
|
||||||
uint bit_length = GetBitLength(num_values, range);
|
uint bit_length = GetBitLength(num_values, range);
|
||||||
if (bit_length <= color_data_bits) {
|
if (bit_length > color_data_bits) {
|
||||||
while (--range > 0) {
|
|
||||||
EncodingData newval = encoding_values[range];
|
|
||||||
if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
++range;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DecodeIntegerSequence(range, num_values);
|
DecodeIntegerSequence(range - 1, num_values);
|
||||||
uint out_index = 0;
|
uint out_index = 0;
|
||||||
for (int itr = 0; itr < result_index; ++itr) {
|
for (int itr = 0; itr < result_index; ++itr) {
|
||||||
if (out_index >= num_values) {
|
if (out_index >= num_values) {
|
||||||
|
@ -1110,10 +1110,10 @@ TexelWeightParams DecodeBlockInfo(uint block_index) {
|
||||||
}
|
}
|
||||||
weight_index -= 2;
|
weight_index -= 2;
|
||||||
if ((mode_layout != 9) && ((mode & 0x200) != 0)) {
|
if ((mode_layout != 9) && ((mode & 0x200) != 0)) {
|
||||||
const int max_weights[6] = int[6](9, 11, 15, 19, 23, 31);
|
const int max_weights[6] = int[6](7, 8, 9, 10, 11, 12);
|
||||||
params.max_weight = max_weights[weight_index];
|
params.max_weight = max_weights[weight_index];
|
||||||
} else {
|
} else {
|
||||||
const int max_weights[6] = int[6](1, 2, 3, 4, 5, 7);
|
const int max_weights[6] = int[6](1, 2, 3, 4, 5, 6);
|
||||||
params.max_weight = max_weights[weight_index];
|
params.max_weight = max_weights[weight_index];
|
||||||
}
|
}
|
||||||
return params;
|
return params;
|
||||||
|
|
|
@ -60,19 +60,15 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||||
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
||||||
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
||||||
swizzle_table_buffer.Create();
|
swizzle_table_buffer.Create();
|
||||||
astc_buffer.Create();
|
|
||||||
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
||||||
glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES,
|
|
||||||
0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UtilShaders::~UtilShaders() = default;
|
UtilShaders::~UtilShaders() = default;
|
||||||
|
|
||||||
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
std::span<const VideoCommon::SwizzleParameters> swizzles) {
|
||||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
|
||||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1;
|
||||||
static constexpr GLuint BINDING_ENC_BUFFER = 2;
|
|
||||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||||
|
|
||||||
const Extent2D tile_size{
|
const Extent2D tile_size{
|
||||||
|
@ -81,7 +77,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
};
|
};
|
||||||
program_manager.BindComputeProgram(astc_decoder_program.handle);
|
program_manager.BindComputeProgram(astc_decoder_program.handle);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
|
|
||||||
|
|
||||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||||
glUniform2ui(1, tile_size.width, tile_size.height);
|
glUniform2ui(1, tile_size.width, tile_size.height);
|
||||||
|
@ -103,11 +98,11 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
glUniform1ui(6, params.block_height);
|
glUniform1ui(6, params.block_height);
|
||||||
glUniform1ui(7, params.block_height_mask);
|
glUniform1ui(7, params.block_height_mask);
|
||||||
|
|
||||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
|
|
||||||
GL_WRITE_ONLY, GL_RGBA8);
|
|
||||||
// ASTC texture data
|
// ASTC texture data
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
|
||||||
image.guest_size_bytes - swizzle.buffer_offset);
|
image.guest_size_bytes - swizzle.buffer_offset);
|
||||||
|
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0,
|
||||||
|
GL_WRITE_ONLY, GL_RGBA8);
|
||||||
|
|
||||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,7 +62,6 @@ private:
|
||||||
ProgramManager& program_manager;
|
ProgramManager& program_manager;
|
||||||
|
|
||||||
OGLBuffer swizzle_table_buffer;
|
OGLBuffer swizzle_table_buffer;
|
||||||
OGLBuffer astc_buffer;
|
|
||||||
|
|
||||||
OGLProgram astc_decoder_program;
|
OGLProgram astc_decoder_program;
|
||||||
OGLProgram block_linear_unswizzle_2d_program;
|
OGLProgram block_linear_unswizzle_2d_program;
|
||||||
|
|
|
@ -30,16 +30,13 @@
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
using Tegra::Texture::SWIZZLE_TABLE;
|
using Tegra::Texture::SWIZZLE_TABLE;
|
||||||
using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES;
|
|
||||||
using namespace Tegra::Texture::ASTC;
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
|
constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
|
||||||
constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
|
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1;
|
||||||
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
|
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2;
|
||||||
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
|
constexpr size_t ASTC_NUM_BINDINGS = 3;
|
||||||
constexpr size_t ASTC_NUM_BINDINGS = 4;
|
|
||||||
|
|
||||||
template <size_t size>
|
template <size_t size>
|
||||||
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
|
inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
|
||||||
|
@ -75,7 +72,7 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
|
||||||
.score = 2,
|
.score = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
|
constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{
|
||||||
{
|
{
|
||||||
.binding = ASTC_BINDING_INPUT_BUFFER,
|
.binding = ASTC_BINDING_INPUT_BUFFER,
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
@ -83,13 +80,6 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
.pImmutableSamplers = nullptr,
|
.pImmutableSamplers = nullptr,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.binding = ASTC_BINDING_ENC_BUFFER,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.pImmutableSamplers = nullptr,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
|
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
@ -108,12 +98,12 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN
|
||||||
|
|
||||||
constexpr DescriptorBankInfo ASTC_BANK_INFO{
|
constexpr DescriptorBankInfo ASTC_BANK_INFO{
|
||||||
.uniform_buffers = 0,
|
.uniform_buffers = 0,
|
||||||
.storage_buffers = 3,
|
.storage_buffers = 2,
|
||||||
.texture_buffers = 0,
|
.texture_buffers = 0,
|
||||||
.image_buffers = 0,
|
.image_buffers = 0,
|
||||||
.textures = 0,
|
.textures = 0,
|
||||||
.images = 1,
|
.images = 1,
|
||||||
.score = 4,
|
.score = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
|
constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
|
||||||
|
@ -135,14 +125,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
|
||||||
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
.offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
.stride = sizeof(DescriptorUpdateEntry),
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.dstBinding = ASTC_BINDING_ENC_BUFFER,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
|
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
|
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
|
||||||
.dstArrayElement = 0,
|
.dstArrayElement = 0,
|
||||||
|
@ -355,7 +337,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
|
||||||
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
ASTCDecoderPass::~ASTCDecoderPass() = default;
|
||||||
|
|
||||||
void ASTCDecoderPass::MakeDataBuffer() {
|
void ASTCDecoderPass::MakeDataBuffer() {
|
||||||
constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE);
|
constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE);
|
||||||
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
|
@ -369,11 +351,7 @@ void ASTCDecoderPass::MakeDataBuffer() {
|
||||||
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
|
data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
|
||||||
|
|
||||||
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
|
const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
|
||||||
std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES,
|
std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE));
|
||||||
sizeof(ASTC_ENCODINGS_VALUES));
|
|
||||||
// Tack on the swizzle table at the end of the buffer
|
|
||||||
std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE,
|
|
||||||
sizeof(SWIZZLE_TABLE));
|
|
||||||
|
|
||||||
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
|
scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
|
||||||
TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
|
TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
|
||||||
|
@ -443,9 +421,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
update_descriptor_queue.Acquire();
|
update_descriptor_queue.Acquire();
|
||||||
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||||
image.guest_size_bytes - swizzle.buffer_offset);
|
image.guest_size_bytes - swizzle.buffer_offset);
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES));
|
update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE));
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
|
|
||||||
sizeof(SWIZZLE_TABLE));
|
|
||||||
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||||
|
|
||||||
|
|
|
@ -151,6 +151,76 @@ private:
|
||||||
const IntType& m_Bits;
|
const IntType& m_Bits;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class IntegerEncoding { JustBits, Quint, Trit };
|
||||||
|
|
||||||
|
struct IntegerEncodedValue {
|
||||||
|
constexpr IntegerEncodedValue() = default;
|
||||||
|
|
||||||
|
constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
|
||||||
|
: encoding{encoding_}, num_bits{num_bits_} {}
|
||||||
|
|
||||||
|
constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
|
||||||
|
return encoding == other.encoding && num_bits == other.num_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the number of bits required to encode num_vals values.
|
||||||
|
u32 GetBitLength(u32 num_vals) const {
|
||||||
|
u32 total_bits = num_bits * num_vals;
|
||||||
|
if (encoding == IntegerEncoding::Trit) {
|
||||||
|
total_bits += (num_vals * 8 + 4) / 5;
|
||||||
|
} else if (encoding == IntegerEncoding::Quint) {
|
||||||
|
total_bits += (num_vals * 7 + 2) / 3;
|
||||||
|
}
|
||||||
|
return total_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
IntegerEncoding encoding{};
|
||||||
|
u32 num_bits = 0;
|
||||||
|
u32 bit_value = 0;
|
||||||
|
union {
|
||||||
|
u32 quint_value = 0;
|
||||||
|
u32 trit_value;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Returns a new instance of this struct that corresponds to the
|
||||||
|
// can take no more than mav_value values
|
||||||
|
static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
|
||||||
|
while (mav_value > 0) {
|
||||||
|
u32 check = mav_value + 1;
|
||||||
|
|
||||||
|
// Is mav_value a power of two?
|
||||||
|
if (!(check & (check - 1))) {
|
||||||
|
return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is mav_value of the type 3*2^n - 1?
|
||||||
|
if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
|
||||||
|
return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is mav_value of the type 5*2^n - 1?
|
||||||
|
if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
|
||||||
|
return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apparently it can't be represented with a bounded integer sequence...
|
||||||
|
// just iterate.
|
||||||
|
mav_value--;
|
||||||
|
}
|
||||||
|
return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
|
||||||
|
std::array<IntegerEncodedValue, 256> encodings{};
|
||||||
|
for (std::size_t i = 0; i < encodings.size(); ++i) {
|
||||||
|
encodings[i] = CreateEncoding(static_cast<u32>(i));
|
||||||
|
}
|
||||||
|
return encodings;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
|
||||||
|
|
||||||
namespace Tegra::Texture::ASTC {
|
namespace Tegra::Texture::ASTC {
|
||||||
using IntegerEncodedVector = boost::container::static_vector<
|
using IntegerEncodedVector = boost::container::static_vector<
|
||||||
IntegerEncodedValue, 256,
|
IntegerEncodedValue, 256,
|
||||||
|
|
|
@ -9,76 +9,6 @@
|
||||||
|
|
||||||
namespace Tegra::Texture::ASTC {
|
namespace Tegra::Texture::ASTC {
|
||||||
|
|
||||||
enum class IntegerEncoding { JustBits, Quint, Trit };
|
|
||||||
|
|
||||||
struct IntegerEncodedValue {
|
|
||||||
constexpr IntegerEncodedValue() = default;
|
|
||||||
|
|
||||||
constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
|
|
||||||
: encoding{encoding_}, num_bits{num_bits_} {}
|
|
||||||
|
|
||||||
constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
|
|
||||||
return encoding == other.encoding && num_bits == other.num_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the number of bits required to encode num_vals values.
|
|
||||||
u32 GetBitLength(u32 num_vals) const {
|
|
||||||
u32 total_bits = num_bits * num_vals;
|
|
||||||
if (encoding == IntegerEncoding::Trit) {
|
|
||||||
total_bits += (num_vals * 8 + 4) / 5;
|
|
||||||
} else if (encoding == IntegerEncoding::Quint) {
|
|
||||||
total_bits += (num_vals * 7 + 2) / 3;
|
|
||||||
}
|
|
||||||
return total_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
IntegerEncoding encoding{};
|
|
||||||
u32 num_bits = 0;
|
|
||||||
u32 bit_value = 0;
|
|
||||||
union {
|
|
||||||
u32 quint_value = 0;
|
|
||||||
u32 trit_value;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
// Returns a new instance of this struct that corresponds to the
|
|
||||||
// can take no more than mav_value values
|
|
||||||
constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
|
|
||||||
while (mav_value > 0) {
|
|
||||||
u32 check = mav_value + 1;
|
|
||||||
|
|
||||||
// Is mav_value a power of two?
|
|
||||||
if (!(check & (check - 1))) {
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is mav_value of the type 3*2^n - 1?
|
|
||||||
if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Is mav_value of the type 5*2^n - 1?
|
|
||||||
if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apparently it can't be represented with a bounded integer sequence...
|
|
||||||
// just iterate.
|
|
||||||
mav_value--;
|
|
||||||
}
|
|
||||||
return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
|
|
||||||
std::array<IntegerEncodedValue, 256> encodings{};
|
|
||||||
for (std::size_t i = 0; i < encodings.size(); ++i) {
|
|
||||||
encodings[i] = CreateEncoding(static_cast<u32>(i));
|
|
||||||
}
|
|
||||||
return encodings;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
|
|
||||||
|
|
||||||
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
||||||
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
|
uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue