glsl: add various comment for future idea

For example GL4 GPU supports special bit operation
This commit is contained in:
Gregory Hainaut 2015-07-18 11:22:08 +02:00
parent 6c1c857024
commit 036cb229a3
3 changed files with 36 additions and 6 deletions

View File

@ -135,6 +135,7 @@ void ps_main1()
// shift Alpha: -7 + 15
highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));
#else
@ -146,6 +147,7 @@ void ps_main1()
highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
#endif

View File

@ -1,6 +1,7 @@
//#version 420 // Keep it for text editor detection
// note lerp => mix
// Require for bit operation
//#extension GL_ARB_gpu_shader5 : enable
#define FMT_32 0
#define FMT_24 1
@ -159,7 +160,8 @@ mat4 sample_4c(vec4 uv)
{
mat4 c;
// FIXME investigate texture gather (filtering impact?)
// Note: texture gather can't be used because of special clamping/wrapping
// Also it doesn't support lod
c[0] = sample_c(uv.xy);
c[1] = sample_c(uv.zy);
c[2] = sample_c(uv.xw);
@ -177,7 +179,8 @@ uvec4 sample_4_index(vec4 uv)
//
// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel
// FIXME investigate texture gather (filtering impact?)
// Note: texture gather can't be used because of special clamping/wrapping
// Also it doesn't support lod
c.x = sample_c(uv.xy).a;
c.y = sample_c(uv.zy).a;
c.z = sample_c(uv.xw).a;
@ -266,10 +269,15 @@ vec4 sample_color(vec2 st, float q)
// PERF: see the impact of the exansion before/after the interpolation
for (int i = 0; i < 4; i++)
{
// PERF note: using dot produce reduces by 1 the number of instruction
// but I'm not it is equivalent neither faster.
//float sum = dot(c[i].rgb, vec3(1.0f));
#if ((PS_FMT & ~FMT_PAL) == FMT_24)
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
#elif ((PS_FMT & ~FMT_PAL) == FMT_16)
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
#endif
}
@ -540,6 +548,11 @@ void ps_main()
// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
#if PS_READ_BA
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
// c.ga = vec2(float(denorm_c.a)/ 255.0f);
if (bool(denorm_c.a & 0x80u))
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
else

View File

@ -160,6 +160,7 @@ static const char* convert_glsl =
" // shift Alpha: -7 + 15\n"
" highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n"
"\n"
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
" SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n"
"\n"
"#else\n"
@ -171,6 +172,7 @@ static const char* convert_glsl =
"\n"
" highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n"
"\n"
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
" SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
"#endif\n"
"\n"
@ -861,7 +863,8 @@ static const char* tfx_vgs_glsl =
static const char* tfx_fs_all_glsl =
"//#version 420 // Keep it for text editor detection\n"
"\n"
"// note lerp => mix\n"
"// Require for bit operation\n"
"//#extension GL_ARB_gpu_shader5 : enable\n"
"\n"
"#define FMT_32 0\n"
"#define FMT_24 1\n"
@ -1020,7 +1023,8 @@ static const char* tfx_fs_all_glsl =
"{\n"
" mat4 c;\n"
"\n"
" // FIXME investigate texture gather (filtering impact?)\n"
" // Note: texture gather can't be used because of special clamping/wrapping\n"
" // Also it doesn't support lod\n"
" c[0] = sample_c(uv.xy);\n"
" c[1] = sample_c(uv.zy);\n"
" c[2] = sample_c(uv.xw);\n"
@ -1038,7 +1042,8 @@ static const char* tfx_fs_all_glsl =
" //\n"
" // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n"
"\n"
" // FIXME investigate texture gather (filtering impact?)\n"
" // Note: texture gather can't be used because of special clamping/wrapping\n"
" // Also it doesn't support lod\n"
" c.x = sample_c(uv.xy).a;\n"
" c.y = sample_c(uv.zy).a;\n"
" c.z = sample_c(uv.xw).a;\n"
@ -1127,10 +1132,15 @@ static const char* tfx_fs_all_glsl =
" // PERF: see the impact of the exansion before/after the interpolation\n"
" for (int i = 0; i < 4; i++)\n"
" {\n"
" // PERF note: using dot produce reduces by 1 the number of instruction\n"
" // but I'm not it is equivalent neither faster.\n"
" //float sum = dot(c[i].rgb, vec3(1.0f));\n"
"#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n"
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
" //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
"#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n"
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
" //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
"#endif\n"
" }\n"
"\n"
@ -1401,6 +1411,11 @@ static const char* tfx_fs_all_glsl =
" // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n"
" // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n"
"#if PS_READ_BA\n"
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
" // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n"
" // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n"
" // c.ga = vec2(float(denorm_c.a)/ 255.0f);\n"
"\n"
" if (bool(denorm_c.a & 0x80u))\n"
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
" else\n"