forked from ShuriZma/suyu
1
0
Fork 0

astc_decoder: Reduce workgroup size

This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32.
This commit is contained in:
ameerj 2021-07-31 23:55:20 -04:00
parent 5ab8053511
commit c439fc9be9
3 changed files with 5 additions and 5 deletions

View File

@ -22,7 +22,7 @@
#endif #endif
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
BEGIN_PUSH_CONSTANTS BEGIN_PUSH_CONSTANTS
UNIFORM(1) uvec2 block_dims; UNIFORM(1) uvec2 block_dims;

View File

@ -82,8 +82,8 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
glFlush(); glFlush();
for (const SwizzleParameters& swizzle : swizzles) { for (const SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset; const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));

View File

@ -358,8 +358,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
}); });
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
const size_t input_offset = swizzle.buffer_offset + map.offset; const size_t input_offset = swizzle.buffer_offset + map.offset;
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers; const u32 num_dispatches_z = image.info.resources.layers;
update_descriptor_queue.Acquire(); update_descriptor_queue.Acquire();