diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp index abe6df2653..a9c1fa2c19 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp @@ -17,6 +17,7 @@ #include "ShaderLang.h" #include "disassemble.h" +#include "Common/CommonFuncs.h" #include "Common/FileUtil.h" #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" @@ -99,6 +100,18 @@ static const char COMPUTE_SHADER_HEADER[] = R"( #define frac fract #define lerp mix )"; +static const char SUBGROUP_HELPER_HEADER[] = R"( + #extension GL_KHR_shader_subgroup_basic : enable + #extension GL_KHR_shader_subgroup_arithmetic : enable + #extension GL_KHR_shader_subgroup_ballot : enable + + #define SUPPORTS_SUBGROUP_REDUCTION 1 + #define CAN_USE_SUBGROUP_REDUCTION true + #define IS_HELPER_INVOCATION gl_HelperInvocation + #define IS_FIRST_ACTIVE_INVOCATION (gl_SubgroupInvocationID == subgroupBallotFindLSB(subgroupBallot(true))) + #define SUBGROUP_MIN(value) value = subgroupMin(value) + #define SUBGROUP_MAX(value) value = subgroupMax(value) +)"; bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename, const char* source_code, size_t source_code_length, const char* header, @@ -120,13 +133,20 @@ bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char int pass_source_code_length = static_cast(source_code_length); if (header_length > 0) { - full_source_code.reserve(header_length + source_code_length); + constexpr size_t subgroup_helper_header_length = ArraySize(SUBGROUP_HELPER_HEADER) - 1; + full_source_code.reserve(header_length + subgroup_helper_header_length + source_code_length); full_source_code.append(header, header_length); + if (g_vulkan_context->SupportsShaderSubgroupOperations()) + full_source_code.append(SUBGROUP_HELPER_HEADER, subgroup_helper_header_length); full_source_code.append(source_code, source_code_length); pass_source_code = full_source_code.c_str(); pass_source_code_length = static_cast(full_source_code.length()); } + // Sub-group operations require Vulkan 1.1 and SPIR-V 1.3. + if (g_vulkan_context->SupportsShaderSubgroupOperations()) + shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3); + shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1); auto DumpBadShader = [&](const char* msg) { diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 51c0cde329..52038e39dc 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -384,6 +384,7 @@ std::unique_ptr VulkanContext::Create(VkInstance instance, VkPhys // Initialize DriverDetails so that we can check for bugs to disable features if needed. context->InitDriverDetails(); + context->PopulateShaderSubgroupSupport(); // Enable debug reports if the "Host GPU" log category is enabled. if (enable_debug_reports) @@ -864,4 +865,31 @@ void VulkanContext::InitDriverDetails() static_cast(m_device_properties.driverVersion), DriverDetails::Family::UNKNOWN); } + +void VulkanContext::PopulateShaderSubgroupSupport() +{ + // If this function isn't available, we don't support Vulkan 1.1. + if (!vkGetPhysicalDeviceProperties2) + return; + + VkPhysicalDeviceProperties2 device_properties_2 = {}; + device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; + subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + device_properties_2.pNext = &subgroup_properties; + + vkGetPhysicalDeviceProperties2(m_physical_device, &device_properties_2); + + m_shader_subgroup_size = subgroup_properties.subgroupSize; + + // We require basic ops (for gl_SubgroupInvocationID), ballot (for subgroupBallot, + // subgroupBallotFindLSB), and arithmetic (for subgroupMin/subgroupMax). + constexpr VkSubgroupFeatureFlags required_operations = VK_SUBGROUP_FEATURE_BASIC_BIT | + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | + VK_SUBGROUP_FEATURE_BALLOT_BIT; + m_supports_shader_subgroup_operations = + (subgroup_properties.supportedOperations & required_operations) == required_operations && + subgroup_properties.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT; +} } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.h b/Source/Core/VideoBackends/Vulkan/VulkanContext.h index 3f4492bc4a..dcd9584e50 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.h +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.h @@ -80,6 +80,8 @@ public: { return m_device_features.occlusionQueryPrecise == VK_TRUE; } + u32 GetShaderSubgroupSize() const { return m_shader_subgroup_size; } + bool SupportsShaderSubgroupOperations() const { return m_supports_shader_subgroup_operations; } // Helpers for getting constants VkDeviceSize GetUniformBufferAlignment() const @@ -112,6 +114,7 @@ private: bool SelectDeviceFeatures(); bool CreateDevice(VkSurfaceKHR surface, bool enable_validation_layer); void InitDriverDetails(); + void PopulateShaderSubgroupSupport(); VkInstance m_instance = VK_NULL_HANDLE; VkPhysicalDevice m_physical_device = VK_NULL_HANDLE; @@ -128,6 +131,9 @@ private: VkPhysicalDeviceFeatures m_device_features = {}; VkPhysicalDeviceProperties m_device_properties = {}; VkPhysicalDeviceMemoryProperties m_device_memory_properties = {}; + + u32 m_shader_subgroup_size = 1; + bool m_supports_shader_subgroup_operations = false; }; extern std::unique_ptr g_vulkan_context; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl b/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl index c40483e10e..2a4c4bda24 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl +++ b/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl @@ -59,6 +59,7 @@ VULKAN_INSTANCE_ENTRY_POINT(vkCreateMacOSSurfaceMVK, false) VULKAN_INSTANCE_ENTRY_POINT(vkCreateDebugReportCallbackEXT, false) VULKAN_INSTANCE_ENTRY_POINT(vkDestroyDebugReportCallbackEXT, false) VULKAN_INSTANCE_ENTRY_POINT(vkDebugReportMessageEXT, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceProperties2, false) #endif // VULKAN_INSTANCE_ENTRY_POINT