diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index c06e838f7f..3b61c9e218 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -913,11 +913,19 @@ void ARBDecompiler::DeclareCompute() { const ComputeInfo& info = registry.GetComputeInfo(); AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - if (info.shared_memory_size_in_words > 0) { - const u32 size_in_bytes = info.shared_memory_size_in_words * 4; - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); + if (info.shared_memory_size_in_words == 0) { + return; } + const u32 limit = device.GetMaxComputeSharedMemorySize(); + u32 size_in_bytes = info.shared_memory_size_in_words * 4; + if (size_in_bytes > limit) { + LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", + size_in_bytes, limit); + size_in_bytes = limit; + } + + AddLine("SHARED_MEMORY {};", size_in_bytes); + AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); } void ARBDecompiler::DeclareInputAttributes() { diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 630acb73b2..e7d95149f0 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -212,6 +212,7 @@ Device::Device() shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); + max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -250,6 +251,7 @@ Device::Device(std::nullptr_t) { shader_storage_alignment = 4; max_vertex_attributes = 16; max_varyings = 15; + max_compute_shared_memory_size = 0x10000; has_warp_intrinsics = true; has_shader_ballot = true; has_vertex_viewport_layer = true; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 94d38d7d14..8a4b6b9fcb 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -52,6 +52,10 @@ public: return max_varyings; } + u32 GetMaxComputeSharedMemorySize() const { + return max_compute_shared_memory_size; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -118,6 +122,7 @@ private: std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; + u32 max_compute_shared_memory_size{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2c49aeaacc..6a9602ff80 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -602,8 +602,15 @@ private: return; } const auto& info = registry.GetComputeInfo(); - if (const u32 size = info.shared_memory_size_in_words; size > 0) { - code.AddLine("shared uint smem[{}];", size); + if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { + const u32 limit = device.GetMaxComputeSharedMemorySize(); + if (size > limit) { + LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", + size, limit); + size = limit; + } + + code.AddLine("shared uint smem[{}];", size / 4); code.AddNewLine(); } code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index ae5c21baa1..529744f2d1 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public: return properties.limits.maxPushConstantsSize; } + /// Returns the maximum size for shared memory. + u32 GetMaxComputeSharedMemorySize() const { + return properties.limits.maxComputeSharedMemorySize; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97429cc59f..694452fd83 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -685,13 +685,19 @@ private: } t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - const u32 smem_size = specialization.shared_memory_size; + u32 smem_size = specialization.shared_memory_size * 4; if (smem_size == 0) { // Avoid declaring an empty array. return; } - const auto element_count = static_cast(Common::AlignUp(smem_size, 4) / 4); - const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); + const u32 limit = device.GetMaxComputeSharedMemorySize(); + if (smem_size > limit) { + LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", + smem_size, limit); + smem_size = limit; + } + + const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); Name(type_pointer, "SharedMemory");