glsl: add various comment for future idea

For example GL4 GPU supports special bit operation
2015-07-18 11:22:08 +02:00 · 2015-07-18 11:22:08 +02:00 · 036cb229a3
parent 6c1c857024
commit 036cb229a3
3 changed files with 36 additions and 6 deletions
--- a/plugins/GSdx/res/glsl/convert.glsl
+++ b/plugins/GSdx/res/glsl/convert.glsl
@ -135,6 +135,7 @@ void ps_main1()
 	// shift Alpha: -7 + 15
    highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value

+	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
    SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));

 #else
@ -146,6 +147,7 @@ void ps_main1()

 	highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));

+	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
 #endif

--- a/plugins/GSdx/res/glsl/tfx_fs.glsl
+++ b/plugins/GSdx/res/glsl/tfx_fs.glsl
@ -1,6 +1,7 @@
 //#version 420 // Keep it for text editor detection

-// note lerp => mix
+// Require for bit operation
+//#extension GL_ARB_gpu_shader5 : enable

 #define FMT_32 0
 #define FMT_24 1
@ -159,7 +160,8 @@ mat4 sample_4c(vec4 uv)
 {
 	mat4 c;

-	// FIXME investigate texture gather (filtering impact?)
+    // Note: texture gather can't be used because of special clamping/wrapping
+    // Also it doesn't support lod
 	c[0] = sample_c(uv.xy);
 	c[1] = sample_c(uv.zy);
 	c[2] = sample_c(uv.xw);
@ -177,7 +179,8 @@ uvec4 sample_4_index(vec4 uv)
 	//
 	// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel

-	// FIXME investigate texture gather (filtering impact?)
+    // Note: texture gather can't be used because of special clamping/wrapping
+    // Also it doesn't support lod
 	c.x = sample_c(uv.xy).a;
 	c.y = sample_c(uv.zy).a;
 	c.z = sample_c(uv.xw).a;
@ -266,10 +269,15 @@ vec4 sample_color(vec2 st, float q)
 	// PERF: see the impact of the exansion before/after the interpolation
 	for (int i = 0; i < 4; i++)
 	{
+        // PERF note: using dot produce reduces by 1 the number of instruction
+        // but I'm not it is equivalent neither faster.
+        //float sum = dot(c[i].rgb, vec3(1.0f));
 #if ((PS_FMT & ~FMT_PAL) == FMT_24)
 		c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb))  ) ? TA.x : 0.0f;
+		//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
 #elif ((PS_FMT & ~FMT_PAL) == FMT_16)
 		c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
+		//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
 #endif
 	}

@ -540,6 +548,11 @@ void ps_main()
 	// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
 	// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
 #if PS_READ_BA
+	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
+	// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
+	// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
+	// c.ga = vec2(float(denorm_c.a)/ 255.0f);
+
 	if (bool(denorm_c.a & 0x80u))
 		c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
 	else
--- a/plugins/GSdx/res/glsl_source.h
+++ b/plugins/GSdx/res/glsl_source.h
@ -160,6 +160,7 @@ static const char* convert_glsl =
 	"	// shift Alpha: -7 + 15\n"
 	"    highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n"
 	"\n"
+	"	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
 	"    SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n"
 	"\n"
 	"#else\n"
@ -171,6 +172,7 @@ static const char* convert_glsl =
 	"\n"
 	"	highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n"
 	"\n"
+	"	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
 	"    SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
 	"#endif\n"
 	"\n"
@ -861,7 +863,8 @@ static const char* tfx_vgs_glsl =
 static const char* tfx_fs_all_glsl =
 	"//#version 420 // Keep it for text editor detection\n"
 	"\n"
-	"// note lerp => mix\n"
+	"// Require for bit operation\n"
+	"//#extension GL_ARB_gpu_shader5 : enable\n"
 	"\n"
 	"#define FMT_32 0\n"
 	"#define FMT_24 1\n"
@ -1020,7 +1023,8 @@ static const char* tfx_fs_all_glsl =
 	"{\n"
 	"	mat4 c;\n"
 	"\n"
-	"	// FIXME investigate texture gather (filtering impact?)\n"
+	"    // Note: texture gather can't be used because of special clamping/wrapping\n"
+	"    // Also it doesn't support lod\n"
 	"	c[0] = sample_c(uv.xy);\n"
 	"	c[1] = sample_c(uv.zy);\n"
 	"	c[2] = sample_c(uv.xw);\n"
@ -1038,7 +1042,8 @@ static const char* tfx_fs_all_glsl =
 	"	//\n"
 	"	// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n"
 	"\n"
-	"	// FIXME investigate texture gather (filtering impact?)\n"
+	"    // Note: texture gather can't be used because of special clamping/wrapping\n"
+	"    // Also it doesn't support lod\n"
 	"	c.x = sample_c(uv.xy).a;\n"
 	"	c.y = sample_c(uv.zy).a;\n"
 	"	c.z = sample_c(uv.xw).a;\n"
@ -1127,10 +1132,15 @@ static const char* tfx_fs_all_glsl =
 	"	// PERF: see the impact of the exansion before/after the interpolation\n"
 	"	for (int i = 0; i < 4; i++)\n"
 	"	{\n"
+	"        // PERF note: using dot produce reduces by 1 the number of instruction\n"
+	"        // but I'm not it is equivalent neither faster.\n"
+	"        //float sum = dot(c[i].rgb, vec3(1.0f));\n"
 	"#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n"
 	"		c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb))  ) ? TA.x : 0.0f;\n"
+	"		//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
 	"#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n"
 	"		c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
+	"		//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
 	"#endif\n"
 	"	}\n"
 	"\n"
@ -1401,6 +1411,11 @@ static const char* tfx_fs_all_glsl =
 	"	// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n"
 	"	// However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n"
 	"#if PS_READ_BA\n"
+	"	// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
+	"	// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n"
+	"	// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n"
+	"	// c.ga = vec2(float(denorm_c.a)/ 255.0f);\n"
+	"\n"
 	"	if (bool(denorm_c.a & 0x80u))\n"
 	"		c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
 	"	else\n"