glsl: disable computing of extra alpha coeff in SW blending

Hum, I'm curious of the impact to enable only this code when it is actually used.
2015-07-17 17:53:00 +02:00 · 2015-07-17 17:53:00 +02:00 · e3751f6cd9
parent 784822a5c2
commit e3751f6cd9
2 changed files with 36 additions and 0 deletions
--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@ -35,7 +35,9 @@ in SHADER

 // Same buffer but 2 colors for dual source blending
 layout(location = 0, index = 0) out vec4 SV_Target0;
+#if !SW_BLEND
 layout(location = 0, index = 1) out vec4 SV_Target1;
+#endif

 #ifdef ENABLE_BINDLESS_TEX
 layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;
@ -218,6 +220,7 @@ mat4 sample_4p(uvec4 u)

 vec4 sample_color(vec2 st, float q)
 {
+	//FIXME: maybe we can set gl_Position.w = q in VS
 #if (PS_FST == 0)
 	st /= q;
 #endif
@ -287,6 +290,7 @@ vec4 sample_color(vec2 st, float q)
 	return t;
 }

+// FIXME Precompute the factor 255/128 in VS
 #ifndef SUBROUTINE_GL40
 vec4 tfx(vec4 t, vec4 c)
 {
@ -552,17 +556,29 @@ void ps_main()
 	c.rb = c.rr;
 #endif

+	// FIXME precompute my_TA & 0x80
+
 	// Write GA part. Mask will take care of the correct destination
+	// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
+	// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
 #if PS_READ_BA
 	if (bool(denorm_c.a & 0x80u))
 		c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
 	else
 		c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
+
 #else
 	if (bool(denorm_c.g & 0x80u))
 		c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
 	else
 		c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
+
+	// Nice idea but step/mix requires 4 instructions
+	// set / trunc / I2F / Mad
+	//
+	// float sel = step(128.0f/255.0f, c.g);
+	// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)) / 255.0f;
+	// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
 #endif

 #endif
@ -601,7 +617,9 @@ void ps_main()
 	ps_fbmask(c);

 	SV_Target0 = c;
+#if !SW_BLEND
 	SV_Target1 = vec4(alpha_blend);
+#endif
 }

 #endif
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -928,7 +928,9 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"// Same buffer but 2 colors for dual source blending\n"
 	"layout(location = 0, index = 0) out vec4 SV_Target0;\n"
+	"#if !SW_BLEND\n"
 	"layout(location = 0, index = 1) out vec4 SV_Target1;\n"
+	"#endif\n"
 	"\n"
 	"#ifdef ENABLE_BINDLESS_TEX\n"
 	"layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler;\n"
@ -1111,6 +1113,7 @@ static const char* tfx_fs_all_glsl =
 	"\n"
 	"vec4 sample_color(vec2 st, float q)\n"
 	"{\n"
+	"	//FIXME: maybe we can set gl_Position.w = q in VS\n"
 	"#if (PS_FST == 0)\n"
 	"	st /= q;\n"
 	"#endif\n"
@ -1180,6 +1183,7 @@ static const char* tfx_fs_all_glsl =
 	"	return t;\n"
 	"}\n"
 	"\n"
+	"// FIXME Precompute the factor 255/128 in VS\n"
 	"#ifndef SUBROUTINE_GL40\n"
 	"vec4 tfx(vec4 t, vec4 c)\n"
 	"{\n"
@ -1445,17 +1449,29 @@ static const char* tfx_fs_all_glsl =
 	"	c.rb = c.rr;\n"
 	"#endif\n"
 	"\n"
+	"	// FIXME precompute my_TA & 0x80\n"
+	"\n"
 	"	// Write GA part. Mask will take care of the correct destination\n"
+	"	// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n"
+	"	// However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n"
 	"#if PS_READ_BA\n"
 	"	if (bool(denorm_c.a & 0x80u))\n"
 	"		c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
 	"	else\n"
 	"		c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
+	"\n"
 	"#else\n"
 	"	if (bool(denorm_c.g & 0x80u))\n"
 	"		c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
 	"	else\n"
 	"		c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
+	"\n"
+	"	// Nice idea but step/mix requires 4 instructions\n"
+	"	// set / trunc / I2F / Mad\n"
+	"	//\n"
+	"	// float sel = step(128.0f/255.0f, c.g);\n"
+	"	// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)) / 255.0f;\n"
+	"	// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);\n"
 	"#endif\n"
 	"\n"
 	"#endif\n"
@ -1494,7 +1510,9 @@ static const char* tfx_fs_all_glsl =
 	"	ps_fbmask(c);\n"
 	"\n"
 	"	SV_Target0 = c;\n"
+	"#if !SW_BLEND\n"
 	"	SV_Target1 = vec4(alpha_blend);\n"
+	"#endif\n"
 	"}\n"
 	"\n"
 	"#endif\n"