glsl: don't use normalized value for color range

Globally shader uses less intruction (except blending part)

It would also allow to improve the rounding of color
This commit is contained in:
Gregory Hainaut 2015-07-18 13:40:10 +02:00
parent 57394a03e0
commit c701ab4368
3 changed files with 145 additions and 147 deletions

View File

@ -632,7 +632,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]);
}
if (context->TEST.ATE)

View File

@ -288,38 +288,41 @@ vec4 sample_color(vec2 st, float q)
return trunc(t * 255.0f);
}
vec4 tfx(vec4 t, vec4 c)
vec4 tfx(vec4 T, vec4 C)
{
vec4 c_out;
vec4 FxT = trunc(trunc(c) * t / 128.0f);
vec4 C_out;
vec4 FxT = trunc(trunc(C) * T / 128.0f);
#if (PS_TFX == 0)
c_out = FxT;
C_out = FxT;
#elif (PS_TFX == 1)
c_out = t;
C_out = T;
#elif (PS_TFX == 2)
c_out.rgb = FxT.rgb + c.a;
c_out.a = t.a + c.a;
C_out.rgb = FxT.rgb + C.a;
C_out.a = T.a + C.a;
#elif (PS_TFX == 3)
c_out.rgb = FxT.rgb + c.a;
c_out.a = t.a;
C_out.rgb = FxT.rgb + C.a;
C_out.a = T.a;
#else
c_out = c;
C_out = C;
#endif
#if (PS_TCC == 0)
c_out.a = c.a;
C_out.a = C.a;
#endif
// Normalize the value
c_out /= 255.0f;
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
// Clamp only when it is useful
C_out = min(C_out, 255.0f);
#endif
return clamp(c_out, vec4(0.0f), vec4(1.0f));
return C_out;
}
void atst(vec4 c)
void atst(vec4 C)
{
float a = trunc(c.a * 255.0 + 0.01);
// FIXME use integer cmp
float a = C.a;
#if (PS_ATST == 0) // never
discard;
@ -346,72 +349,72 @@ void atst(vec4 c)
#endif
}
void colclip(inout vec4 c)
void colclip(inout vec4 C)
{
#if (PS_COLCLIP == 2)
c.rgb = 256.0f/255.0f - c.rgb;
C.rgb = 256.0f - C.rgb;
#endif
#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)
bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));
c.rgb *= vec3(factor);
bvec3 factor = lessThan(C.rgb, vec3(128.0f));
C.rgb *= vec3(factor);
#endif
}
void fog(inout vec4 c, float f)
void fog(inout vec4 C, float f)
{
#if PS_FOG != 0
c.rgb = mix(FogColor, c.rgb, f);
C.rgb = trunc(mix(FogColor, C.rgb, f));
#endif
}
vec4 ps_color()
{
vec4 t = sample_color(PSin_t.xy, PSin_t.w);
vec4 T = sample_color(PSin_t.xy, PSin_t.w);
#if PS_IIP == 1
vec4 c = tfx(t, PSin_c);
vec4 C = tfx(T, PSin_c);
#else
vec4 c = tfx(t, PSin_fc);
vec4 C = tfx(T, PSin_fc);
#endif
atst(c);
atst(C);
fog(c, PSin_t.z);
fog(C, PSin_t.z);
colclip(c);
colclip(C);
#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
c.rgb = vec3(1.0f, 1.0f, 1.0f);
C.rgb = vec3(255.0f);
#endif
return c;
return C;
}
void ps_fbmask(inout vec4 c)
void ps_fbmask(inout vec4 C)
{
// FIXME do I need special case for 16 bits
#if PS_FBMASK
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)) / 255.0f;
#endif
}
void ps_blend(inout vec4 c, in float As)
void ps_blend(inout vec4 Color, float As)
{
#if SW_BLEND
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
#if PS_DFMT == FMT_24
float Ad = 1.0f;
#else
// FIXME FMT_16 case
// FIXME Ad or Ad * 2?
float Ad = rt.a * 255.0f / 128.0f;
float Ad = RT.a / 128.0f;
#endif
// Let the compiler do its jobs !
vec3 Cd = rt.rgb;
vec3 Cs = c.rgb;
vec3 Cd = RT.rgb;
vec3 Cs = Color.rgb;
#if PS_BLEND_A == 0
vec3 A = Cs;
@ -446,9 +449,9 @@ void ps_blend(inout vec4 c, in float As)
#endif
#if PS_BLEND_A == PS_BLEND_B
c.rgb = D;
Color.rgb = D;
#else
c.rgb = ((A - B) * C) + D;
Color.rgb = ((A - B) * C) + D;
#endif
// FIXME dithering
@ -456,7 +459,7 @@ void ps_blend(inout vec4 c, in float As)
// Correct the Color value based on the output format
#if PS_COLCLIP != 3
// Standard Clamp
c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));
Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));
#endif
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
@ -465,15 +468,11 @@ void ps_blend(inout vec4 c, in float As)
#if PS_DFMT == FMT_16
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
// Basically we want to do 'c.rgb &= 0xF8' in denormalized mode
c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f;
Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xF8));
#elif PS_COLCLIP == 3
// Basically we want to do 'c.rgb &= 0xFF' in denormalized mode
c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f;
Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xFF));
#endif
// Don't compile => unable to find compatible overloaded function "mod(vec3)"
//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;
#endif
}
@ -515,29 +514,29 @@ void ps_main()
}
#endif
vec4 c = ps_color();
vec4 C = ps_color();
#if (APITRACE_DEBUG & 1) == 1
c.r = 1.0f;
C.r = 255f;
#endif
#if (APITRACE_DEBUG & 2) == 2
c.g = 1.0f;
C.g = 255f;
#endif
#if (APITRACE_DEBUG & 4) == 4
c.b = 1.0f;
C.b = 255f;
#endif
#if (APITRACE_DEBUG & 8) == 8
c.a = 0.5f;
C.a = 128f;
#endif
#if PS_SHUFFLE
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
uvec4 denorm_c = uvec4(C);
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
// Write RB part. Mask will take care of the correct destination
#if PS_READ_BA
c.rb = c.bb;
C.rb = C.bb;
#else
c.rb = c.rr;
C.rb = C.rr;
#endif
// FIXME precompute my_TA & 0x80
@ -549,63 +548,63 @@ void ps_main()
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
// c.ga = vec2(float(denorm_c.a)/ 255.0f);
// c.ga = vec2(float(denorm_c.a));
if (bool(denorm_c.a & 0x80u))
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
#else
if (bool(denorm_c.g & 0x80u))
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
else
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
// Nice idea but step/mix requires 4 instructions
// set / trunc / I2F / Mad
//
// float sel = step(128.0f/255.0f, c.g);
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)) / 255.0f;
// float sel = step(128.0f, c.g);
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));
// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
#endif
#endif
// Must be done before alpha correction
float alpha_blend = c.a * 255.0f / 128.0f;
float alpha_blend = C.a / 128.0f;
// Correct the ALPHA value based on the output format
// FIXME add support of alpha mask to replace properly PS_AOUT
#if (PS_DFMT == FMT_16) || (PS_AOUT)
float a = 128.0f / 255.0; // alpha output will be 0x80
c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;
float A_one = 128.0f; // alpha output will be 0x80
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)
if(c.a < 0.5) c.a += 128.0f/255.0f;
if(C.a < 128.0f) C.a += 128.0f;
#endif
// Get first primitive that will write a failling alpha value
#if PS_DATE == 1 && !defined(DISABLE_GL42_image)
// DATM == 0
// Pixel with alpha equal to 1 will failed (128-255)
if (c.a > 127.5f / 255.0f) {
if (C.a > 127.5f) {
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
return;
}
#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)
// DATM == 1
// Pixel with alpha equal to 0 will failed (0-127)
if (c.a < 127.5f / 255.0f) {
if (C.a < 127.5f) {
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
return;
}
#endif
ps_blend(c, alpha_blend);
ps_blend(C, alpha_blend);
ps_fbmask(c);
ps_fbmask(C);
SV_Target0 = c;
SV_Target0 = C / 255.0f;
SV_Target1 = vec4(alpha_blend);
}

View File

@ -1151,38 +1151,41 @@ static const char* tfx_fs_all_glsl =
" return trunc(t * 255.0f);\n"
"}\n"
"\n"
"vec4 tfx(vec4 t, vec4 c)\n"
"vec4 tfx(vec4 T, vec4 C)\n"
"{\n"
" vec4 c_out;\n"
" vec4 FxT = trunc(trunc(c) * t / 128.0f);\n"
" vec4 C_out;\n"
" vec4 FxT = trunc(trunc(C) * T / 128.0f);\n"
"\n"
"#if (PS_TFX == 0)\n"
" c_out = FxT;\n"
" C_out = FxT;\n"
"#elif (PS_TFX == 1)\n"
" c_out = t;\n"
" C_out = T;\n"
"#elif (PS_TFX == 2)\n"
" c_out.rgb = FxT.rgb + c.a;\n"
" c_out.a = t.a + c.a;\n"
" C_out.rgb = FxT.rgb + C.a;\n"
" C_out.a = T.a + C.a;\n"
"#elif (PS_TFX == 3)\n"
" c_out.rgb = FxT.rgb + c.a;\n"
" c_out.a = t.a;\n"
" C_out.rgb = FxT.rgb + C.a;\n"
" C_out.a = T.a;\n"
"#else\n"
" c_out = c;\n"
" C_out = C;\n"
"#endif\n"
"\n"
"#if (PS_TCC == 0)\n"
" c_out.a = c.a;\n"
" C_out.a = C.a;\n"
"#endif\n"
"\n"
" // Normalize the value\n"
" c_out /= 255.0f;\n"
"#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)\n"
" // Clamp only when it is useful\n"
" C_out = min(C_out, 255.0f);\n"
"#endif\n"
"\n"
" return clamp(c_out, vec4(0.0f), vec4(1.0f));\n"
" return C_out;\n"
"}\n"
"\n"
"void atst(vec4 c)\n"
"void atst(vec4 C)\n"
"{\n"
" float a = trunc(c.a * 255.0 + 0.01);\n"
" // FIXME use integer cmp\n"
" float a = C.a;\n"
"\n"
"#if (PS_ATST == 0) // never\n"
" discard;\n"
@ -1209,72 +1212,72 @@ static const char* tfx_fs_all_glsl =
"#endif\n"
"}\n"
"\n"
"void colclip(inout vec4 c)\n"
"void colclip(inout vec4 C)\n"
"{\n"
"#if (PS_COLCLIP == 2)\n"
" c.rgb = 256.0f/255.0f - c.rgb;\n"
" C.rgb = 256.0f - C.rgb;\n"
"#endif\n"
"#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)\n"
" bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));\n"
" c.rgb *= vec3(factor);\n"
" bvec3 factor = lessThan(C.rgb, vec3(128.0f));\n"
" C.rgb *= vec3(factor);\n"
"#endif\n"
"}\n"
"\n"
"void fog(inout vec4 c, float f)\n"
"void fog(inout vec4 C, float f)\n"
"{\n"
"#if PS_FOG != 0\n"
" c.rgb = mix(FogColor, c.rgb, f);\n"
" C.rgb = trunc(mix(FogColor, C.rgb, f));\n"
"#endif\n"
"}\n"
"\n"
"vec4 ps_color()\n"
"{\n"
" vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n"
" vec4 T = sample_color(PSin_t.xy, PSin_t.w);\n"
"\n"
"#if PS_IIP == 1\n"
" vec4 c = tfx(t, PSin_c);\n"
" vec4 C = tfx(T, PSin_c);\n"
"#else\n"
" vec4 c = tfx(t, PSin_fc);\n"
" vec4 C = tfx(T, PSin_fc);\n"
"#endif\n"
"\n"
" atst(c);\n"
" atst(C);\n"
"\n"
" fog(c, PSin_t.z);\n"
" fog(C, PSin_t.z);\n"
"\n"
" colclip(c);\n"
" colclip(C);\n"
"\n"
"#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
" c.rgb = vec3(1.0f, 1.0f, 1.0f);\n"
" C.rgb = vec3(255.0f);\n"
"#endif\n"
"\n"
" return c;\n"
" return C;\n"
"}\n"
"\n"
"void ps_fbmask(inout vec4 c)\n"
"void ps_fbmask(inout vec4 C)\n"
"{\n"
" // FIXME do I need special case for 16 bits\n"
"#if PS_FBMASK\n"
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
" uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);\n"
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
" c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;\n"
" vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n"
" C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)) / 255.0f;\n"
"#endif\n"
"}\n"
"\n"
"void ps_blend(inout vec4 c, in float As)\n"
"void ps_blend(inout vec4 Color, float As)\n"
"{\n"
"#if SW_BLEND\n"
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
" vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n"
"\n"
"#if PS_DFMT == FMT_24\n"
" float Ad = 1.0f;\n"
"#else\n"
" // FIXME FMT_16 case\n"
" // FIXME Ad or Ad * 2?\n"
" float Ad = rt.a * 255.0f / 128.0f;\n"
" float Ad = RT.a / 128.0f;\n"
"#endif\n"
"\n"
" // Let the compiler do its jobs !\n"
" vec3 Cd = rt.rgb;\n"
" vec3 Cs = c.rgb;\n"
" vec3 Cd = RT.rgb;\n"
" vec3 Cs = Color.rgb;\n"
"\n"
"#if PS_BLEND_A == 0\n"
" vec3 A = Cs;\n"
@ -1309,9 +1312,9 @@ static const char* tfx_fs_all_glsl =
"#endif\n"
"\n"
"#if PS_BLEND_A == PS_BLEND_B\n"
" c.rgb = D;\n"
" Color.rgb = D;\n"
"#else\n"
" c.rgb = ((A - B) * C) + D;\n"
" Color.rgb = ((A - B) * C) + D;\n"
"#endif\n"
"\n"
" // FIXME dithering\n"
@ -1319,7 +1322,7 @@ static const char* tfx_fs_all_glsl =
" // Correct the Color value based on the output format\n"
"#if PS_COLCLIP != 3\n"
" // Standard Clamp\n"
" c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n"
" Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n"
"#endif\n"
"\n"
" // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy\n"
@ -1328,15 +1331,11 @@ static const char* tfx_fs_all_glsl =
"#if PS_DFMT == FMT_16\n"
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
"\n"
" // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n"
" c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f;\n"
" Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xF8));\n"
"#elif PS_COLCLIP == 3\n"
" // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n"
" c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f;\n"
" Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xFF));\n"
"#endif\n"
"\n"
" // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n"
" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n"
"#endif\n"
"}\n"
"\n"
@ -1378,29 +1377,29 @@ static const char* tfx_fs_all_glsl =
" }\n"
"#endif\n"
"\n"
" vec4 c = ps_color();\n"
" vec4 C = ps_color();\n"
"#if (APITRACE_DEBUG & 1) == 1\n"
" c.r = 1.0f;\n"
" C.r = 255f;\n"
"#endif\n"
"#if (APITRACE_DEBUG & 2) == 2\n"
" c.g = 1.0f;\n"
" C.g = 255f;\n"
"#endif\n"
"#if (APITRACE_DEBUG & 4) == 4\n"
" c.b = 1.0f;\n"
" C.b = 255f;\n"
"#endif\n"
"#if (APITRACE_DEBUG & 8) == 8\n"
" c.a = 0.5f;\n"
" C.a = 128f;\n"
"#endif\n"
"\n"
"#if PS_SHUFFLE\n"
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
" uvec4 denorm_c = uvec4(C);\n"
" uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n"
"\n"
" // Write RB part. Mask will take care of the correct destination\n"
"#if PS_READ_BA\n"
" c.rb = c.bb;\n"
" C.rb = C.bb;\n"
"#else\n"
" c.rb = c.rr;\n"
" C.rb = C.rr;\n"
"#endif\n"
"\n"
" // FIXME precompute my_TA & 0x80\n"
@ -1412,63 +1411,63 @@ static const char* tfx_fs_all_glsl =
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
" // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n"
" // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n"
" // c.ga = vec2(float(denorm_c.a)/ 255.0f);\n"
" // c.ga = vec2(float(denorm_c.a));\n"
"\n"
" if (bool(denorm_c.a & 0x80u))\n"
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
" C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));\n"
" else\n"
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
" C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));\n"
"\n"
"#else\n"
" if (bool(denorm_c.g & 0x80u))\n"
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
" C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));\n"
" else\n"
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
" C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));\n"
"\n"
" // Nice idea but step/mix requires 4 instructions\n"
" // set / trunc / I2F / Mad\n"
" //\n"
" // float sel = step(128.0f/255.0f, c.g);\n"
" // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)) / 255.0f;\n"
" // float sel = step(128.0f, c.g);\n"
" // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));\n"
" // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
" // Must be done before alpha correction\n"
" float alpha_blend = c.a * 255.0f / 128.0f;\n"
" float alpha_blend = C.a / 128.0f;\n"
"\n"
" // Correct the ALPHA value based on the output format\n"
" // FIXME add support of alpha mask to replace properly PS_AOUT\n"
"#if (PS_DFMT == FMT_16) || (PS_AOUT)\n"
" float a = 128.0f / 255.0; // alpha output will be 0x80\n"
" c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n"
" float A_one = 128.0f; // alpha output will be 0x80\n"
" C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;\n"
"#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n"
" if(c.a < 0.5) c.a += 128.0f/255.0f;\n"
" if(C.a < 128.0f) C.a += 128.0f;\n"
"#endif\n"
"\n"
" // Get first primitive that will write a failling alpha value\n"
"#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n"
" // DATM == 0\n"
" // Pixel with alpha equal to 1 will failed (128-255)\n"
" if (c.a > 127.5f / 255.0f) {\n"
" if (C.a > 127.5f) {\n"
" imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
" return;\n"
" }\n"
"#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n"
" // DATM == 1\n"
" // Pixel with alpha equal to 0 will failed (0-127)\n"
" if (c.a < 127.5f / 255.0f) {\n"
" if (C.a < 127.5f) {\n"
" imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
" return;\n"
" }\n"
"#endif\n"
"\n"
" ps_blend(c, alpha_blend);\n"
" ps_blend(C, alpha_blend);\n"
"\n"
" ps_fbmask(c);\n"
" ps_fbmask(C);\n"
"\n"
" SV_Target0 = c;\n"
" SV_Target0 = C / 255.0f;\n"
" SV_Target1 = vec4(alpha_blend);\n"
"}\n"
"\n"