mirror of https://github.com/PCSX2/pcsx2.git
glsl: add various comment for future idea
For example GL4 GPU supports special bit operation
This commit is contained in:
parent
6c1c857024
commit
036cb229a3
|
@ -135,6 +135,7 @@ void ps_main1()
|
||||||
// shift Alpha: -7 + 15
|
// shift Alpha: -7 + 15
|
||||||
highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value
|
highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value
|
||||||
|
|
||||||
|
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
|
||||||
SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));
|
SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -146,6 +147,7 @@ void ps_main1()
|
||||||
|
|
||||||
highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));
|
highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));
|
||||||
|
|
||||||
|
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
|
||||||
SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
|
SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
//#version 420 // Keep it for text editor detection
|
//#version 420 // Keep it for text editor detection
|
||||||
|
|
||||||
// note lerp => mix
|
// Require for bit operation
|
||||||
|
//#extension GL_ARB_gpu_shader5 : enable
|
||||||
|
|
||||||
#define FMT_32 0
|
#define FMT_32 0
|
||||||
#define FMT_24 1
|
#define FMT_24 1
|
||||||
|
@ -159,7 +160,8 @@ mat4 sample_4c(vec4 uv)
|
||||||
{
|
{
|
||||||
mat4 c;
|
mat4 c;
|
||||||
|
|
||||||
// FIXME investigate texture gather (filtering impact?)
|
// Note: texture gather can't be used because of special clamping/wrapping
|
||||||
|
// Also it doesn't support lod
|
||||||
c[0] = sample_c(uv.xy);
|
c[0] = sample_c(uv.xy);
|
||||||
c[1] = sample_c(uv.zy);
|
c[1] = sample_c(uv.zy);
|
||||||
c[2] = sample_c(uv.xw);
|
c[2] = sample_c(uv.xw);
|
||||||
|
@ -177,7 +179,8 @@ uvec4 sample_4_index(vec4 uv)
|
||||||
//
|
//
|
||||||
// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel
|
// Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel
|
||||||
|
|
||||||
// FIXME investigate texture gather (filtering impact?)
|
// Note: texture gather can't be used because of special clamping/wrapping
|
||||||
|
// Also it doesn't support lod
|
||||||
c.x = sample_c(uv.xy).a;
|
c.x = sample_c(uv.xy).a;
|
||||||
c.y = sample_c(uv.zy).a;
|
c.y = sample_c(uv.zy).a;
|
||||||
c.z = sample_c(uv.xw).a;
|
c.z = sample_c(uv.xw).a;
|
||||||
|
@ -266,10 +269,15 @@ vec4 sample_color(vec2 st, float q)
|
||||||
// PERF: see the impact of the exansion before/after the interpolation
|
// PERF: see the impact of the exansion before/after the interpolation
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
|
// PERF note: using dot produce reduces by 1 the number of instruction
|
||||||
|
// but I'm not it is equivalent neither faster.
|
||||||
|
//float sum = dot(c[i].rgb, vec3(1.0f));
|
||||||
#if ((PS_FMT & ~FMT_PAL) == FMT_24)
|
#if ((PS_FMT & ~FMT_PAL) == FMT_24)
|
||||||
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||||
|
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||||
#elif ((PS_FMT & ~FMT_PAL) == FMT_16)
|
#elif ((PS_FMT & ~FMT_PAL) == FMT_16)
|
||||||
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||||
|
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -540,6 +548,11 @@ void ps_main()
|
||||||
// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
|
// Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n"
|
||||||
// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
|
// However Nvidia emulate it with an if (at least on kepler arch) ...\n"
|
||||||
#if PS_READ_BA
|
#if PS_READ_BA
|
||||||
|
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
|
||||||
|
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
|
||||||
|
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
|
||||||
|
// c.ga = vec2(float(denorm_c.a)/ 255.0f);
|
||||||
|
|
||||||
if (bool(denorm_c.a & 0x80u))
|
if (bool(denorm_c.a & 0x80u))
|
||||||
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
|
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
|
||||||
else
|
else
|
||||||
|
|
|
@ -160,6 +160,7 @@ static const char* convert_glsl =
|
||||||
" // shift Alpha: -7 + 15\n"
|
" // shift Alpha: -7 + 15\n"
|
||||||
" highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n"
|
" highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
|
||||||
" SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n"
|
" SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#else\n"
|
"#else\n"
|
||||||
|
@ -171,6 +172,7 @@ static const char* convert_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
" highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n"
|
" highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
|
||||||
" SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
|
" SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
@ -861,7 +863,8 @@ static const char* tfx_vgs_glsl =
|
||||||
static const char* tfx_fs_all_glsl =
|
static const char* tfx_fs_all_glsl =
|
||||||
"//#version 420 // Keep it for text editor detection\n"
|
"//#version 420 // Keep it for text editor detection\n"
|
||||||
"\n"
|
"\n"
|
||||||
"// note lerp => mix\n"
|
"// Require for bit operation\n"
|
||||||
|
"//#extension GL_ARB_gpu_shader5 : enable\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#define FMT_32 0\n"
|
"#define FMT_32 0\n"
|
||||||
"#define FMT_24 1\n"
|
"#define FMT_24 1\n"
|
||||||
|
@ -1020,7 +1023,8 @@ static const char* tfx_fs_all_glsl =
|
||||||
"{\n"
|
"{\n"
|
||||||
" mat4 c;\n"
|
" mat4 c;\n"
|
||||||
"\n"
|
"\n"
|
||||||
" // FIXME investigate texture gather (filtering impact?)\n"
|
" // Note: texture gather can't be used because of special clamping/wrapping\n"
|
||||||
|
" // Also it doesn't support lod\n"
|
||||||
" c[0] = sample_c(uv.xy);\n"
|
" c[0] = sample_c(uv.xy);\n"
|
||||||
" c[1] = sample_c(uv.zy);\n"
|
" c[1] = sample_c(uv.zy);\n"
|
||||||
" c[2] = sample_c(uv.xw);\n"
|
" c[2] = sample_c(uv.xw);\n"
|
||||||
|
@ -1038,7 +1042,8 @@ static const char* tfx_fs_all_glsl =
|
||||||
" //\n"
|
" //\n"
|
||||||
" // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n"
|
" // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n"
|
||||||
"\n"
|
"\n"
|
||||||
" // FIXME investigate texture gather (filtering impact?)\n"
|
" // Note: texture gather can't be used because of special clamping/wrapping\n"
|
||||||
|
" // Also it doesn't support lod\n"
|
||||||
" c.x = sample_c(uv.xy).a;\n"
|
" c.x = sample_c(uv.xy).a;\n"
|
||||||
" c.y = sample_c(uv.zy).a;\n"
|
" c.y = sample_c(uv.zy).a;\n"
|
||||||
" c.z = sample_c(uv.xw).a;\n"
|
" c.z = sample_c(uv.xw).a;\n"
|
||||||
|
@ -1127,10 +1132,15 @@ static const char* tfx_fs_all_glsl =
|
||||||
" // PERF: see the impact of the exansion before/after the interpolation\n"
|
" // PERF: see the impact of the exansion before/after the interpolation\n"
|
||||||
" for (int i = 0; i < 4; i++)\n"
|
" for (int i = 0; i < 4; i++)\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
|
" // PERF note: using dot produce reduces by 1 the number of instruction\n"
|
||||||
|
" // but I'm not it is equivalent neither faster.\n"
|
||||||
|
" //float sum = dot(c[i].rgb, vec3(1.0f));\n"
|
||||||
"#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n"
|
"#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n"
|
||||||
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||||
|
" //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||||
"#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n"
|
"#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n"
|
||||||
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||||
|
" //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
" }\n"
|
" }\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
@ -1401,6 +1411,11 @@ static const char* tfx_fs_all_glsl =
|
||||||
" // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n"
|
" // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n"
|
||||||
" // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n"
|
" // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n"
|
||||||
"#if PS_READ_BA\n"
|
"#if PS_READ_BA\n"
|
||||||
|
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
|
||||||
|
" // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n"
|
||||||
|
" // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n"
|
||||||
|
" // c.ga = vec2(float(denorm_c.a)/ 255.0f);\n"
|
||||||
|
"\n"
|
||||||
" if (bool(denorm_c.a & 0x80u))\n"
|
" if (bool(denorm_c.a & 0x80u))\n"
|
||||||
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
|
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
|
||||||
" else\n"
|
" else\n"
|
||||||
|
|
Loading…
Reference in New Issue