forked from ShuriZma/suyu
astc_decoder: Reduce workgroup size
This reduces the amount of over dispatching when there are odd dimensions (i.e. ASTC 8x5), which rarely evenly divide into 32x32.
This commit is contained in:
parent
5ab8053511
commit
c439fc9be9
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
||||||
|
|
||||||
BEGIN_PUSH_CONSTANTS
|
BEGIN_PUSH_CONSTANTS
|
||||||
UNIFORM(1) uvec2 block_dims;
|
UNIFORM(1) uvec2 block_dims;
|
||||||
|
|
|
@ -82,8 +82,8 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
|
||||||
glFlush();
|
glFlush();
|
||||||
for (const SwizzleParameters& swizzle : swizzles) {
|
for (const SwizzleParameters& swizzle : swizzles) {
|
||||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||||
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
|
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
|
||||||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
|
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
|
||||||
|
|
||||||
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||||
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
|
||||||
|
|
|
@ -358,8 +358,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
});
|
});
|
||||||
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
|
for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
|
||||||
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
const size_t input_offset = swizzle.buffer_offset + map.offset;
|
||||||
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
|
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U);
|
||||||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
|
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
|
||||||
const u32 num_dispatches_z = image.info.resources.layers;
|
const u32 num_dispatches_z = image.info.resources.layers;
|
||||||
|
|
||||||
update_descriptor_queue.Acquire();
|
update_descriptor_queue.Acquire();
|
||||||
|
|
Loading…
Reference in New Issue