forked from ShuriZma/suyu
astc: Various robustness enhancements for the gpu decoder
These changes should help in reducing crashes/drivers panics that may occur due to synchronization issues between the shader completion and later access of the decoded texture.
This commit is contained in:
parent
3870ba670f
commit
31b125ef57
|
@ -14,9 +14,8 @@
|
||||||
#define BINDING_6_TO_8_BUFFER 2
|
#define BINDING_6_TO_8_BUFFER 2
|
||||||
#define BINDING_7_TO_8_BUFFER 3
|
#define BINDING_7_TO_8_BUFFER 3
|
||||||
#define BINDING_8_TO_8_BUFFER 4
|
#define BINDING_8_TO_8_BUFFER 4
|
||||||
#define BINDING_BYTE_TO_16_BUFFER 5
|
#define BINDING_SWIZZLE_BUFFER 5
|
||||||
#define BINDING_SWIZZLE_BUFFER 6
|
#define BINDING_OUTPUT_IMAGE 6
|
||||||
#define BINDING_OUTPUT_IMAGE 7
|
|
||||||
|
|
||||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||||
|
|
||||||
|
@ -29,7 +28,6 @@
|
||||||
#define BINDING_6_TO_8_BUFFER 3
|
#define BINDING_6_TO_8_BUFFER 3
|
||||||
#define BINDING_7_TO_8_BUFFER 4
|
#define BINDING_7_TO_8_BUFFER 4
|
||||||
#define BINDING_8_TO_8_BUFFER 5
|
#define BINDING_8_TO_8_BUFFER 5
|
||||||
#define BINDING_BYTE_TO_16_BUFFER 6
|
|
||||||
#define BINDING_OUTPUT_IMAGE 0
|
#define BINDING_OUTPUT_IMAGE 0
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -86,9 +84,6 @@ layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_
|
||||||
layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
|
layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
|
||||||
uint REPLICATE_8_BIT_TO_8_TABLE[];
|
uint REPLICATE_8_BIT_TO_8_TABLE[];
|
||||||
};
|
};
|
||||||
layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 {
|
|
||||||
uint REPLICATE_BYTE_TO_16_TABLE[];
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
|
layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
|
||||||
|
|
||||||
|
@ -207,8 +202,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) {
|
||||||
}
|
}
|
||||||
|
|
||||||
uvec4 ReplicateByteTo16(uvec4 value) {
|
uvec4 ReplicateByteTo16(uvec4 value) {
|
||||||
return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y],
|
return value * 0x101;
|
||||||
REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint ReplicateBitTo7(uint value) {
|
uint ReplicateBitTo7(uint value) {
|
||||||
|
@ -1327,6 +1321,9 @@ void main() {
|
||||||
offset += swizzle;
|
offset += swizzle;
|
||||||
|
|
||||||
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
|
const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
|
||||||
|
if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
uint block_index =
|
uint block_index =
|
||||||
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
|
pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
|
||||||
|
|
||||||
|
|
|
@ -83,7 +83,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
|
static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
|
||||||
static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
|
static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
|
||||||
static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
|
static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
|
||||||
static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6;
|
|
||||||
|
|
||||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||||
|
|
||||||
|
@ -105,9 +104,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
|
||||||
offsetof(AstcBufferData, replicate_8_to_8),
|
offsetof(AstcBufferData, replicate_8_to_8),
|
||||||
sizeof(AstcBufferData::replicate_8_to_8));
|
sizeof(AstcBufferData::replicate_8_to_8));
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle,
|
|
||||||
offsetof(AstcBufferData, replicate_byte_to_16),
|
|
||||||
sizeof(AstcBufferData::replicate_byte_to_16));
|
|
||||||
|
|
||||||
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
|
||||||
glUniform2ui(1, tile_size.width, tile_size.height);
|
glUniform2ui(1, tile_size.width, tile_size.height);
|
||||||
|
@ -137,6 +133,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
|
|
||||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||||
}
|
}
|
||||||
|
glMemoryBarrier(GL_ALL_BARRIER_BITS);
|
||||||
program_manager.RestoreGuestCompute();
|
program_manager.RestoreGuestCompute();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,9 +40,9 @@ constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
|
||||||
constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
|
constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
|
||||||
constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
|
constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
|
||||||
constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
|
constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
|
||||||
constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5;
|
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5;
|
||||||
constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6;
|
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6;
|
||||||
constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7;
|
constexpr size_t ASTC_NUM_BINDINGS = 7;
|
||||||
|
|
||||||
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
|
VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
|
||||||
return {
|
return {
|
||||||
|
@ -71,7 +71,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
|
std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
|
||||||
return {{
|
return {{
|
||||||
{
|
{
|
||||||
.binding = ASTC_BINDING_INPUT_BUFFER,
|
.binding = ASTC_BINDING_INPUT_BUFFER,
|
||||||
|
@ -108,13 +108,6 @@ std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
.pImmutableSamplers = nullptr,
|
.pImmutableSamplers = nullptr,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.binding = ASTC_BINDING_BYTE_TO_16_BUFFER,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
||||||
.pImmutableSamplers = nullptr,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
|
.binding = ASTC_BINDING_SWIZZLE_BUFFER,
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
@ -143,7 +136,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() {
|
std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
|
||||||
|
BuildASTCPassDescriptorUpdateTemplateEntry() {
|
||||||
return {{
|
return {{
|
||||||
{
|
{
|
||||||
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
|
.dstBinding = ASTC_BINDING_INPUT_BUFFER,
|
||||||
|
@ -185,14 +179,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT
|
||||||
.offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
|
.offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
.stride = sizeof(DescriptorUpdateEntry),
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER,
|
|
||||||
.dstArrayElement = 0,
|
|
||||||
.descriptorCount = 1,
|
|
||||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
||||||
.offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry),
|
|
||||||
.stride = sizeof(DescriptorUpdateEntry),
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
|
.dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
|
||||||
.dstArrayElement = 0,
|
.dstArrayElement = 0,
|
||||||
|
@ -222,15 +208,6 @@ struct AstcPushConstants {
|
||||||
u32 block_height_mask;
|
u32 block_height_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct AstcBufferData {
|
|
||||||
decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE;
|
|
||||||
decltype(EncodingsValues) encoding_values = EncodingsValues;
|
|
||||||
decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
|
|
||||||
decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
|
|
||||||
} constexpr ASTC_BUFFER_DATA;
|
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
|
VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
|
||||||
|
@ -517,9 +494,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
sizeof(AstcBufferData::replicate_7_to_8));
|
sizeof(AstcBufferData::replicate_7_to_8));
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
|
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
|
||||||
sizeof(AstcBufferData::replicate_8_to_8));
|
sizeof(AstcBufferData::replicate_8_to_8));
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer,
|
|
||||||
offsetof(AstcBufferData, replicate_byte_to_16),
|
|
||||||
sizeof(AstcBufferData::replicate_byte_to_16));
|
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
|
update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
|
||||||
sizeof(SWIZZLE_TABLE));
|
sizeof(SWIZZLE_TABLE));
|
||||||
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||||
|
@ -569,6 +543,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
|
||||||
});
|
});
|
||||||
|
scheduler.Finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
|
||||||
static constexpr u32 ReplicateByteTo16(std::size_t value) {
|
static constexpr u32 ReplicateByteTo16(std::size_t value) {
|
||||||
return REPLICATE_BYTE_TO_16_TABLE[value];
|
return REPLICATE_BYTE_TO_16_TABLE[value];
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,7 +116,6 @@ constexpr auto MakeReplicateTable() {
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
|
|
||||||
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
|
constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
|
||||||
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
|
constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
|
||||||
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
|
constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
|
||||||
|
@ -126,7 +125,6 @@ struct AstcBufferData {
|
||||||
decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
|
decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
|
||||||
decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
|
decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
|
||||||
decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
|
decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
|
||||||
decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
|
|
||||||
} constexpr ASTC_BUFFER_DATA;
|
} constexpr ASTC_BUFFER_DATA;
|
||||||
|
|
||||||
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
|
||||||
|
|
Loading…
Reference in New Issue