From 2b00447a43f54a17228a9c73b16183543369624f Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 5 Jun 2016 16:42:52 +0200 Subject: [PATCH] glsl: optimize the number of active constant buffer Increase the performance on the free driver (Nouveau) Currently the driver validates all UBO when only 1 is updated. It is clearly a bad idea to put all UBO in a single common headers. --- plugins/GSdx/res/fxaa.fx | 6 +++ plugins/GSdx/res/glsl/common_header.glsl | 32 ++++++--------- plugins/GSdx/res/glsl/interlace.glsl | 6 +++ plugins/GSdx/res/glsl/merge.glsl | 5 +++ plugins/GSdx/res/glsl/tfx_fs.glsl | 3 ++ plugins/GSdx/res/glsl_source.h | 52 +++++++++++++++--------- 6 files changed, 64 insertions(+), 40 deletions(-) diff --git a/plugins/GSdx/res/fxaa.fx b/plugins/GSdx/res/fxaa.fx index 9e1b4f1e30..74f3a59ee6 100644 --- a/plugins/GSdx/res/fxaa.fx +++ b/plugins/GSdx/res/fxaa.fx @@ -21,6 +21,12 @@ in SHADER layout(location = 0) out vec4 SV_Target0; +layout(std140, binding = 14) uniform cb14 +{ + vec2 _xyFrame; + vec4 _rcpFrame; +}; + #else #if (SHADER_MODEL >= 0x400) diff --git a/plugins/GSdx/res/glsl/common_header.glsl b/plugins/GSdx/res/glsl/common_header.glsl index e0e5d01eca..3f56e15181 100644 --- a/plugins/GSdx/res/glsl/common_header.glsl +++ b/plugins/GSdx/res/glsl/common_header.glsl @@ -41,29 +41,18 @@ out gl_PerVertex { ////////////////////////////////////////////////////////////////////// // Constant Buffer Definition ////////////////////////////////////////////////////////////////////// -layout(std140, binding = 10) uniform cb10 -{ - vec4 BGColor; -}; - -layout(std140, binding = 11) uniform cb11 -{ - vec2 ZrH; - float hH; -}; - -layout(std140, binding = 14) uniform cb14 -{ - vec2 _xyFrame; - vec4 _rcpFrame; -}; +// Performance note, some drivers (nouveau) will validate all Constant Buffers +// even if only one was updated. +#ifdef FRAGMENT_SHADER layout(std140, binding = 15) uniform cb15 { ivec4 ScalingFactor; ivec4 ChannelShuffle; }; +#endif +#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER) layout(std140, binding = 20) uniform cb20 { vec2 VertexScale; @@ -72,7 +61,9 @@ layout(std140, binding = 20) uniform cb20 uint cb20_pad; vec2 PointSize; }; +#endif +#if defined(VERTEX_SHADER) || defined(FRAGMENT_SHADER) layout(std140, binding = 21) uniform cb21 { vec3 FogColor; @@ -95,11 +86,12 @@ layout(std140, binding = 21) uniform cb21 vec2 TextureScale; vec2 TC_OffsetHack; }; +#endif -layout(std140, binding = 22) uniform cb22 -{ - vec4 rt_size; -}; +//layout(std140, binding = 22) uniform cb22 +//{ +// vec4 rt_size; +//}; ////////////////////////////////////////////////////////////////////// // Default Sampler diff --git a/plugins/GSdx/res/glsl/interlace.glsl b/plugins/GSdx/res/glsl/interlace.glsl index d6fd4b2893..1a38667890 100644 --- a/plugins/GSdx/res/glsl/interlace.glsl +++ b/plugins/GSdx/res/glsl/interlace.glsl @@ -8,6 +8,12 @@ in SHADER #ifdef FRAGMENT_SHADER +layout(std140, binding = 11) uniform cb11 +{ + vec2 ZrH; + float hH; +}; + layout(location = 0) out vec4 SV_Target0; // TODO ensure that clip (discard) is < 0 and not <= 0 ??? diff --git a/plugins/GSdx/res/glsl/merge.glsl b/plugins/GSdx/res/glsl/merge.glsl index 5ca06101e8..0b058d6ccb 100644 --- a/plugins/GSdx/res/glsl/merge.glsl +++ b/plugins/GSdx/res/glsl/merge.glsl @@ -8,6 +8,11 @@ in SHADER #ifdef FRAGMENT_SHADER +layout(std140, binding = 10) uniform cb10 +{ + vec4 BGColor; +}; + layout(location = 0) out vec4 SV_Target0; void ps_main0() diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 6a25b1498c..cd290bfe2c 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -44,6 +44,9 @@ layout(binding = 4) uniform sampler2D RawTextureSampler; #ifndef DISABLE_GL42_image #if PS_DATE > 0 +// Performance note: images mustn't be declared if they are unused. Otherwise it will +// require extra shader validation. + // FIXME how to declare memory access layout(r32i, binding = 2) uniform iimage2D img_prim_min; // WARNING: diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 9500a89b47..c3beda937d 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -66,29 +66,18 @@ static const char* const common_header_glsl = "//////////////////////////////////////////////////////////////////////\n" "// Constant Buffer Definition\n" "//////////////////////////////////////////////////////////////////////\n" - "layout(std140, binding = 10) uniform cb10\n" - "{\n" - " vec4 BGColor;\n" - "};\n" - "\n" - "layout(std140, binding = 11) uniform cb11\n" - "{\n" - " vec2 ZrH;\n" - " float hH;\n" - "};\n" - "\n" - "layout(std140, binding = 14) uniform cb14\n" - "{\n" - " vec2 _xyFrame;\n" - " vec4 _rcpFrame;\n" - "};\n" + "// Performance note, some drivers (nouveau) will validate all Constant Buffers\n" + "// even if only one was updated.\n" "\n" + "#ifdef FRAGMENT_SHADER\n" "layout(std140, binding = 15) uniform cb15\n" "{\n" " ivec4 ScalingFactor;\n" " ivec4 ChannelShuffle;\n" "};\n" + "#endif\n" "\n" + "#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER)\n" "layout(std140, binding = 20) uniform cb20\n" "{\n" " vec2 VertexScale;\n" @@ -97,7 +86,9 @@ static const char* const common_header_glsl = " uint cb20_pad;\n" " vec2 PointSize;\n" "};\n" + "#endif\n" "\n" + "#if defined(VERTEX_SHADER) || defined(FRAGMENT_SHADER)\n" "layout(std140, binding = 21) uniform cb21\n" "{\n" " vec3 FogColor;\n" @@ -120,11 +111,12 @@ static const char* const common_header_glsl = " vec2 TextureScale;\n" " vec2 TC_OffsetHack;\n" "};\n" + "#endif\n" "\n" - "layout(std140, binding = 22) uniform cb22\n" - "{\n" - " vec4 rt_size;\n" - "};\n" + "//layout(std140, binding = 22) uniform cb22\n" + "//{\n" + "// vec4 rt_size;\n" + "//};\n" "\n" "//////////////////////////////////////////////////////////////////////\n" "// Default Sampler\n" @@ -529,6 +521,12 @@ static const char* const interlace_glsl = "\n" "#ifdef FRAGMENT_SHADER\n" "\n" + "layout(std140, binding = 11) uniform cb11\n" + "{\n" + " vec2 ZrH;\n" + " float hH;\n" + "};\n" + "\n" "layout(location = 0) out vec4 SV_Target0;\n" "\n" "// TODO ensure that clip (discard) is < 0 and not <= 0 ???\n" @@ -582,6 +580,11 @@ static const char* const merge_glsl = "\n" "#ifdef FRAGMENT_SHADER\n" "\n" + "layout(std140, binding = 10) uniform cb10\n" + "{\n" + " vec4 BGColor;\n" + "};\n" + "\n" "layout(location = 0) out vec4 SV_Target0;\n" "\n" "void ps_main0()\n" @@ -884,6 +887,9 @@ static const char* const tfx_fs_all_glsl = "\n" "#ifndef DISABLE_GL42_image\n" "#if PS_DATE > 0\n" + "// Performance note: images mustn't be declared if they are unused. Otherwise it will\n" + "// require extra shader validation.\n" + "\n" "// FIXME how to declare memory access\n" "layout(r32i, binding = 2) uniform iimage2D img_prim_min;\n" "// WARNING:\n" @@ -1654,6 +1660,12 @@ static const char* const fxaa_fx = "\n" "layout(location = 0) out vec4 SV_Target0;\n" "\n" + "layout(std140, binding = 14) uniform cb14\n" + "{\n" + " vec2 _xyFrame;\n" + " vec4 _rcpFrame;\n" + "};\n" + "\n" "#else\n" "\n" "#if (SHADER_MODEL >= 0x400)\n"