From 86da28257026e8f23b1fdcbda5630b8980105313 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 22 Mar 2019 20:39:11 +1000 Subject: [PATCH] OGL: Support subgroup reduction operations via GL_NV_shader_thread_shuffle --- .../VideoBackends/OGL/ProgramShaderCache.cpp | 29 +++++++++++++++++-- Source/Core/VideoBackends/OGL/Render.cpp | 3 ++ Source/Core/VideoBackends/OGL/Render.h | 1 + 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index e06491b986..2dc1ef318c 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -719,6 +719,29 @@ void ProgramShaderCache::CreateHeader() break; } + std::string shader_shuffle_string; + if (g_ogl_config.bSupportsShaderThreadShuffleNV) + { + shader_shuffle_string = R"( +#extension GL_NV_shader_thread_group : enable +#extension GL_NV_shader_thread_shuffle : enable +#define SUPPORTS_SUBGROUP_REDUCTION 1 + +// The xor shuffle below produces incorrect results if all threads in a warp are not active. +#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu) + +#define IS_HELPER_INVOCATION gl_HelperThreadNV +#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV))) +#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \ + value = func(value, shuffleXorNV(value, 8, 32)); \ + value = func(value, shuffleXorNV(value, 4, 32)); \ + value = func(value, shuffleXorNV(value, 2, 32)); \ + value = func(value, shuffleXorNV(value, 1, 32)); +#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value) +#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value) +)"; + } + s_glsl_header = StringFromFormat( "%s\n" "%s\n" // ubo @@ -737,6 +760,7 @@ void ProgramShaderCache::CreateHeader() "%s\n" // ES dual source blend "%s\n" // shader image load store "%s\n" // shader framebuffer fetch + "%s\n" // shader thread shuffle // Precision defines for GLSL ES "%s\n" @@ -815,8 +839,9 @@ void ProgramShaderCache::CreateHeader() ((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ? "#extension GL_ARB_shader_image_load_store : enable" : "", - framebuffer_fetch_string.c_str(), is_glsles ? "precision highp float;" : "", - is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "", + framebuffer_fetch_string.c_str(), shader_shuffle_string.c_str(), + is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "", + is_glsles ? "precision highp sampler2DArray;" : "", (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? "precision highp usamplerBuffer;" : "", diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index fca1de7e7b..06e5135191 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -661,6 +661,9 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA) g_ogl_config.max_samples = 1; + g_ogl_config.bSupportsShaderThreadShuffleNV = + GLExtensions::Supports("GL_NV_shader_thread_shuffle"); + // We require texel buffers, image load store, and compute shaders to enable GPU texture decoding. // If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be // enabled in the version check below. diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index 442a31d5c0..0fb6c4e93e 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -70,6 +70,7 @@ struct VideoConfig bool bSupportsBitfield; bool bSupportsTextureSubImage; EsFbFetchType SupportedFramebufferFetch; + bool bSupportsShaderThreadShuffleNV; const char* gl_vendor; const char* gl_renderer;