Merge pull request #11523 from degasus/OGL_KHR_subgroup
VideoBackend/OGL: Prefer KHR_shader_subgroup over NV_shader_thread.
This commit is contained in:
commit
258151fe5a
|
@ -37,6 +37,7 @@
|
||||||
#include "Common/GL/GLExtensions/EXT_texture_filter_anisotropic.h"
|
#include "Common/GL/GLExtensions/EXT_texture_filter_anisotropic.h"
|
||||||
#include "Common/GL/GLExtensions/HP_occlusion_test.h"
|
#include "Common/GL/GLExtensions/HP_occlusion_test.h"
|
||||||
#include "Common/GL/GLExtensions/KHR_debug.h"
|
#include "Common/GL/GLExtensions/KHR_debug.h"
|
||||||
|
#include "Common/GL/GLExtensions/KHR_shader_subgroup.h"
|
||||||
#include "Common/GL/GLExtensions/NV_depth_buffer_float.h"
|
#include "Common/GL/GLExtensions/NV_depth_buffer_float.h"
|
||||||
#include "Common/GL/GLExtensions/NV_occlusion_query_samples.h"
|
#include "Common/GL/GLExtensions/NV_occlusion_query_samples.h"
|
||||||
#include "Common/GL/GLExtensions/NV_primitive_restart.h"
|
#include "Common/GL/GLExtensions/NV_primitive_restart.h"
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
/*
|
||||||
|
** Copyright (c) 2013-2015 The Khronos Group Inc.
|
||||||
|
** SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "Common/GL/GLExtensions/gl_common.h"
|
||||||
|
|
||||||
|
#define GL_SUBGROUP_SIZE_KHR 0x9532
|
||||||
|
#define GL_SUBGROUP_SUPPORTED_STAGES_KHR 0x9533
|
||||||
|
#define GL_SUBGROUP_SUPPORTED_FEATURES_KHR 0x9534
|
||||||
|
#define GL_SUBGROUP_QUAD_ALL_STAGES_KHR 0x9535
|
||||||
|
#define GL_SUBGROUP_FEATURE_BASIC_BIT_KHR 0x00000001
|
||||||
|
#define GL_SUBGROUP_FEATURE_VOTE_BIT_KHR 0x00000002
|
||||||
|
#define GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR 0x00000004
|
||||||
|
#define GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR 0x00000008
|
||||||
|
#define GL_SUBGROUP_FEATURE_SHUFFLE_BIT_KHR 0x00000010
|
||||||
|
#define GL_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT_KHR 0x00000020
|
||||||
|
#define GL_SUBGROUP_FEATURE_CLUSTERED_BIT_KHR 0x00000040
|
||||||
|
#define GL_SUBGROUP_FEATURE_QUAD_BIT_KHR 0x00000080
|
|
@ -386,7 +386,6 @@ static const std::string_view SUBGROUP_HELPER_HEADER = R"(
|
||||||
#extension GL_KHR_shader_subgroup_ballot : enable
|
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||||
|
|
||||||
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||||
#define CAN_USE_SUBGROUP_REDUCTION true
|
|
||||||
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
||||||
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
|
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
|
||||||
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
||||||
|
|
|
@ -489,7 +489,14 @@ bool PopulateConfig(GLContext* m_main_gl_context)
|
||||||
else if (GLExtensions::Version() >= 430)
|
else if (GLExtensions::Version() >= 430)
|
||||||
{
|
{
|
||||||
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
|
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
|
||||||
g_ogl_config.eSupportedGLSLVersion = Glsl430;
|
if (GLExtensions::Version() >= 450)
|
||||||
|
{
|
||||||
|
g_ogl_config.eSupportedGLSLVersion = Glsl450;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
g_ogl_config.eSupportedGLSLVersion = Glsl430;
|
||||||
|
}
|
||||||
g_ogl_config.bSupportsTextureStorage = true;
|
g_ogl_config.bSupportsTextureStorage = true;
|
||||||
g_ogl_config.bSupportsImageLoadStore = true;
|
g_ogl_config.bSupportsImageLoadStore = true;
|
||||||
g_Config.backend_info.bSupportsSSAA = true;
|
g_Config.backend_info.bSupportsSSAA = true;
|
||||||
|
@ -531,8 +538,23 @@ bool PopulateConfig(GLContext* m_main_gl_context)
|
||||||
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
||||||
g_ogl_config.max_samples = 1;
|
g_ogl_config.max_samples = 1;
|
||||||
|
|
||||||
g_ogl_config.bSupportsShaderThreadShuffleNV =
|
const bool bSupportsIsHelperInvocation = g_ogl_config.bIsES ?
|
||||||
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
|
g_ogl_config.eSupportedGLSLVersion >= GlslEs320 :
|
||||||
|
g_ogl_config.eSupportedGLSLVersion >= Glsl450;
|
||||||
|
g_ogl_config.bSupportsKHRShaderSubgroup =
|
||||||
|
GLExtensions::Supports("GL_KHR_shader_subgroup") && bSupportsIsHelperInvocation;
|
||||||
|
if (g_ogl_config.bSupportsKHRShaderSubgroup)
|
||||||
|
{
|
||||||
|
// Check for the features: basic + arithmetic + ballot
|
||||||
|
GLint supported_features = 0;
|
||||||
|
glGetIntegerv(GL_SUBGROUP_SUPPORTED_FEATURES_KHR, &supported_features);
|
||||||
|
if (~supported_features &
|
||||||
|
(GL_SUBGROUP_FEATURE_BASIC_BIT_KHR | GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR |
|
||||||
|
GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR))
|
||||||
|
{
|
||||||
|
g_ogl_config.bSupportsKHRShaderSubgroup = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
||||||
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
||||||
|
|
|
@ -15,8 +15,9 @@ enum GlslVersion
|
||||||
Glsl140,
|
Glsl140,
|
||||||
Glsl150,
|
Glsl150,
|
||||||
Glsl330,
|
Glsl330,
|
||||||
Glsl400, // and above
|
Glsl400, // and above
|
||||||
Glsl430,
|
Glsl430, // 430 - 440
|
||||||
|
Glsl450, // 450 - xxx
|
||||||
GlslEs300, // GLES 3.0
|
GlslEs300, // GLES 3.0
|
||||||
GlslEs310, // GLES 3.1
|
GlslEs310, // GLES 3.1
|
||||||
GlslEs320, // GLES 3.2
|
GlslEs320, // GLES 3.2
|
||||||
|
@ -61,7 +62,7 @@ struct VideoConfig
|
||||||
bool bSupportsBitfield;
|
bool bSupportsBitfield;
|
||||||
bool bSupportsTextureSubImage;
|
bool bSupportsTextureSubImage;
|
||||||
EsFbFetchType SupportedFramebufferFetch;
|
EsFbFetchType SupportedFramebufferFetch;
|
||||||
bool bSupportsShaderThreadShuffleNV;
|
bool bSupportsKHRShaderSubgroup; // basic + arithmetic + ballot
|
||||||
|
|
||||||
const char* gl_vendor;
|
const char* gl_vendor;
|
||||||
const char* gl_renderer;
|
const char* gl_renderer;
|
||||||
|
|
|
@ -78,6 +78,8 @@ static std::string GetGLSLVersionString()
|
||||||
return "#version 400";
|
return "#version 400";
|
||||||
case Glsl430:
|
case Glsl430:
|
||||||
return "#version 430";
|
return "#version 430";
|
||||||
|
case Glsl450:
|
||||||
|
return "#version 450";
|
||||||
default:
|
default:
|
||||||
// Shouldn't ever hit this
|
// Shouldn't ever hit this
|
||||||
return "#version ERROR";
|
return "#version ERROR";
|
||||||
|
@ -720,25 +722,18 @@ void ProgramShaderCache::CreateHeader()
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string shader_shuffle_string;
|
std::string shader_shuffle_string;
|
||||||
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
|
if (g_ogl_config.bSupportsKHRShaderSubgroup)
|
||||||
{
|
{
|
||||||
shader_shuffle_string = R"(
|
shader_shuffle_string = R"(
|
||||||
#extension GL_NV_shader_thread_group : enable
|
#extension GL_KHR_shader_subgroup_basic : enable
|
||||||
#extension GL_NV_shader_thread_shuffle : enable
|
#extension GL_KHR_shader_subgroup_arithmetic : enable
|
||||||
|
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||||
|
|
||||||
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||||
|
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
||||||
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
|
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
|
||||||
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
|
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
||||||
|
#define SUBGROUP_MAX(value) value = subgroupMax(value)
|
||||||
#define IS_HELPER_INVOCATION gl_HelperThreadNV
|
|
||||||
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
|
|
||||||
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
|
|
||||||
value = func(value, shuffleXorNV(value, 8, 32)); \
|
|
||||||
value = func(value, shuffleXorNV(value, 4, 32)); \
|
|
||||||
value = func(value, shuffleXorNV(value, 2, 32)); \
|
|
||||||
value = func(value, shuffleXorNV(value, 1, 32));
|
|
||||||
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
|
|
||||||
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
|
|
||||||
)";
|
)";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -81,9 +81,8 @@ static const char SUBGROUP_HELPER_HEADER[] = R"(
|
||||||
#extension GL_KHR_shader_subgroup_ballot : enable
|
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||||
|
|
||||||
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
#define SUPPORTS_SUBGROUP_REDUCTION 1
|
||||||
#define CAN_USE_SUBGROUP_REDUCTION true
|
|
||||||
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
#define IS_HELPER_INVOCATION gl_HelperInvocation
|
||||||
#define IS_FIRST_ACTIVE_INVOCATION (gl_SubgroupInvocationID == subgroupBallotFindLSB(subgroupBallot(!gl_HelperInvocation)))
|
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
|
||||||
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
#define SUBGROUP_MIN(value) value = subgroupMin(value)
|
||||||
#define SUBGROUP_MAX(value) value = subgroupMax(value)
|
#define SUBGROUP_MAX(value) value = subgroupMax(value)
|
||||||
)";
|
)";
|
||||||
|
|
|
@ -457,15 +457,12 @@ void UpdateBoundingBox(float2 rawpos) {{
|
||||||
int2 pos_br = pos | 1; // round up to odd
|
int2 pos_br = pos | 1; // round up to odd
|
||||||
|
|
||||||
#ifdef SUPPORTS_SUBGROUP_REDUCTION
|
#ifdef SUPPORTS_SUBGROUP_REDUCTION
|
||||||
if (CAN_USE_SUBGROUP_REDUCTION) {{
|
if (!IS_HELPER_INVOCATION)
|
||||||
int2 min_pos = IS_HELPER_INVOCATION ? int2(2147483647, 2147483647) : pos_tl;
|
{{
|
||||||
int2 max_pos = IS_HELPER_INVOCATION ? int2(-2147483648, -2147483648) : pos_br;
|
SUBGROUP_MIN(pos_tl);
|
||||||
SUBGROUP_MIN(min_pos);
|
SUBGROUP_MAX(pos_br);
|
||||||
SUBGROUP_MAX(max_pos);
|
|
||||||
if (IS_FIRST_ACTIVE_INVOCATION)
|
if (IS_FIRST_ACTIVE_INVOCATION)
|
||||||
UpdateBoundingBoxBuffer(min_pos, max_pos);
|
UpdateBoundingBoxBuffer(pos_tl, pos_br);
|
||||||
}} else {{
|
|
||||||
UpdateBoundingBoxBuffer(pos_tl, pos_br);
|
|
||||||
}}
|
}}
|
||||||
#else
|
#else
|
||||||
UpdateBoundingBoxBuffer(pos_tl, pos_br);
|
UpdateBoundingBoxBuffer(pos_tl, pos_br);
|
||||||
|
|
Loading…
Reference in New Issue