diff --git a/plugins/GSdx/res/glsl/convert.glsl b/plugins/GSdx/res/glsl/convert.glsl index f7027a0a2f..f3ef9887b6 100644 --- a/plugins/GSdx/res/glsl/convert.glsl +++ b/plugins/GSdx/res/glsl/convert.glsl @@ -135,6 +135,7 @@ void ps_main1() // shift Alpha: -7 + 15 highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value + // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000)); #else @@ -146,6 +147,7 @@ void ps_main1() highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000))); + // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000)); #endif diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 14916a2d5e..27f5065d0c 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -1,6 +1,7 @@ //#version 420 // Keep it for text editor detection -// note lerp => mix +// Require for bit operation +//#extension GL_ARB_gpu_shader5 : enable #define FMT_32 0 #define FMT_24 1 @@ -159,7 +160,8 @@ mat4 sample_4c(vec4 uv) { mat4 c; - // FIXME investigate texture gather (filtering impact?) + // Note: texture gather can't be used because of special clamping/wrapping + // Also it doesn't support lod c[0] = sample_c(uv.xy); c[1] = sample_c(uv.zy); c[2] = sample_c(uv.xw); @@ -177,7 +179,8 @@ uvec4 sample_4_index(vec4 uv) // // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel - // FIXME investigate texture gather (filtering impact?) + // Note: texture gather can't be used because of special clamping/wrapping + // Also it doesn't support lod c.x = sample_c(uv.xy).a; c.y = sample_c(uv.zy).a; c.z = sample_c(uv.xw).a; @@ -266,10 +269,15 @@ vec4 sample_color(vec2 st, float q) // PERF: see the impact of the exansion before/after the interpolation for (int i = 0; i < 4; i++) { + // PERF note: using dot produce reduces by 1 the number of instruction + // but I'm not it is equivalent neither faster. + //float sum = dot(c[i].rgb, vec3(1.0f)); #if ((PS_FMT & ~FMT_PAL) == FMT_24) c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; + //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; #elif ((PS_FMT & ~FMT_PAL) == FMT_16) c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; + //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; #endif } @@ -540,6 +548,11 @@ void ps_main() // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n" // However Nvidia emulate it with an if (at least on kepler arch) ...\n" #if PS_READ_BA + // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below + // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x; + // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1); + // c.ga = vec2(float(denorm_c.a)/ 255.0f); + if (bool(denorm_c.a & 0x80u)) c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f); else diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 06ec947f1d..b6b220dd09 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -160,6 +160,7 @@ static const char* convert_glsl = " // shift Alpha: -7 + 15\n" " highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n" "\n" + " // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n" " SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n" "\n" "#else\n" @@ -171,6 +172,7 @@ static const char* convert_glsl = "\n" " highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n" "\n" + " // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n" " SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n" "#endif\n" "\n" @@ -861,7 +863,8 @@ static const char* tfx_vgs_glsl = static const char* tfx_fs_all_glsl = "//#version 420 // Keep it for text editor detection\n" "\n" - "// note lerp => mix\n" + "// Require for bit operation\n" + "//#extension GL_ARB_gpu_shader5 : enable\n" "\n" "#define FMT_32 0\n" "#define FMT_24 1\n" @@ -1020,7 +1023,8 @@ static const char* tfx_fs_all_glsl = "{\n" " mat4 c;\n" "\n" - " // FIXME investigate texture gather (filtering impact?)\n" + " // Note: texture gather can't be used because of special clamping/wrapping\n" + " // Also it doesn't support lod\n" " c[0] = sample_c(uv.xy);\n" " c[1] = sample_c(uv.zy);\n" " c[2] = sample_c(uv.xw);\n" @@ -1038,7 +1042,8 @@ static const char* tfx_fs_all_glsl = " //\n" " // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n" "\n" - " // FIXME investigate texture gather (filtering impact?)\n" + " // Note: texture gather can't be used because of special clamping/wrapping\n" + " // Also it doesn't support lod\n" " c.x = sample_c(uv.xy).a;\n" " c.y = sample_c(uv.zy).a;\n" " c.z = sample_c(uv.xw).a;\n" @@ -1127,10 +1132,15 @@ static const char* tfx_fs_all_glsl = " // PERF: see the impact of the exansion before/after the interpolation\n" " for (int i = 0; i < 4; i++)\n" " {\n" + " // PERF note: using dot produce reduces by 1 the number of instruction\n" + " // but I'm not it is equivalent neither faster.\n" + " //float sum = dot(c[i].rgb, vec3(1.0f));\n" "#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n" " c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" + " //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" "#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n" " c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" + " //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" "#endif\n" " }\n" "\n" @@ -1401,6 +1411,11 @@ static const char* tfx_fs_all_glsl = " // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n" " // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n" "#if PS_READ_BA\n" + " // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n" + " // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n" + " // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n" + " // c.ga = vec2(float(denorm_c.a)/ 255.0f);\n" + "\n" " if (bool(denorm_c.a & 0x80u))\n" " c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n" " else\n"