OGL: Support subgroup reduction operations via GL_NV_shader_thread_shuffle
This commit is contained in:
parent
95c7b5c635
commit
86da282570
|
@ -719,6 +719,29 @@ void ProgramShaderCache::CreateHeader()
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string shader_shuffle_string;
|
||||||
|
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
|
||||||
|
{
|
||||||
|
shader_shuffle_string = R"(
|
||||||
|
#extension GL_NV_shader_thread_group : enable
|
||||||
|
#extension GL_NV_shader_thread_shuffle : enable
|
||||||
|
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||||
|
|
||||||
|
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
|
||||||
|
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
|
||||||
|
|
||||||
|
#define IS_HELPER_INVOCATION gl_HelperThreadNV
|
||||||
|
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
|
||||||
|
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 8, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 4, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 2, 32)); \
|
||||||
|
value = func(value, shuffleXorNV(value, 1, 32));
|
||||||
|
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
|
||||||
|
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
|
||||||
|
)";
|
||||||
|
}
|
||||||
|
|
||||||
s_glsl_header = StringFromFormat(
|
s_glsl_header = StringFromFormat(
|
||||||
"%s\n"
|
"%s\n"
|
||||||
"%s\n" // ubo
|
"%s\n" // ubo
|
||||||
|
@ -737,6 +760,7 @@ void ProgramShaderCache::CreateHeader()
|
||||||
"%s\n" // ES dual source blend
|
"%s\n" // ES dual source blend
|
||||||
"%s\n" // shader image load store
|
"%s\n" // shader image load store
|
||||||
"%s\n" // shader framebuffer fetch
|
"%s\n" // shader framebuffer fetch
|
||||||
|
"%s\n" // shader thread shuffle
|
||||||
|
|
||||||
// Precision defines for GLSL ES
|
// Precision defines for GLSL ES
|
||||||
"%s\n"
|
"%s\n"
|
||||||
|
@ -815,8 +839,9 @@ void ProgramShaderCache::CreateHeader()
|
||||||
((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ?
|
((!is_glsles && v < Glsl430) || (is_glsles && v < GlslEs310)) ?
|
||||||
"#extension GL_ARB_shader_image_load_store : enable" :
|
"#extension GL_ARB_shader_image_load_store : enable" :
|
||||||
"",
|
"",
|
||||||
framebuffer_fetch_string.c_str(), is_glsles ? "precision highp float;" : "",
|
framebuffer_fetch_string.c_str(), shader_shuffle_string.c_str(),
|
||||||
is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "",
|
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
|
||||||
|
is_glsles ? "precision highp sampler2DArray;" : "",
|
||||||
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
|
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
|
||||||
"precision highp usamplerBuffer;" :
|
"precision highp usamplerBuffer;" :
|
||||||
"",
|
"",
|
||||||
|
|
|
@ -661,6 +661,9 @@ Renderer::Renderer(std::unique_ptr<GLContext> main_gl_context, float backbuffer_
|
||||||
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
||||||
g_ogl_config.max_samples = 1;
|
g_ogl_config.max_samples = 1;
|
||||||
|
|
||||||
|
g_ogl_config.bSupportsShaderThreadShuffleNV =
|
||||||
|
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
|
||||||
|
|
||||||
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
||||||
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
||||||
// enabled in the version check below.
|
// enabled in the version check below.
|
||||||
|
|
|
@ -70,6 +70,7 @@ struct VideoConfig
|
||||||
bool bSupportsBitfield;
|
bool bSupportsBitfield;
|
||||||
bool bSupportsTextureSubImage;
|
bool bSupportsTextureSubImage;
|
||||||
EsFbFetchType SupportedFramebufferFetch;
|
EsFbFetchType SupportedFramebufferFetch;
|
||||||
|
bool bSupportsShaderThreadShuffleNV;
|
||||||
|
|
||||||
const char* gl_vendor;
|
const char* gl_vendor;
|
||||||
const char* gl_renderer;
|
const char* gl_renderer;
|
||||||
|
|
Loading…
Reference in New Issue