mirror of https://github.com/PCSX2/pcsx2.git
glsl: don't use normalized value for color range
Globally shader uses less intruction (except blending part) It would also allow to improve the rounding of color
This commit is contained in:
parent
57394a03e0
commit
c701ab4368
|
@ -632,7 +632,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
{
|
||||
ps_sel.fog = 1;
|
||||
|
||||
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
|
||||
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]);
|
||||
}
|
||||
|
||||
if (context->TEST.ATE)
|
||||
|
|
|
@ -288,38 +288,41 @@ vec4 sample_color(vec2 st, float q)
|
|||
return trunc(t * 255.0f);
|
||||
}
|
||||
|
||||
vec4 tfx(vec4 t, vec4 c)
|
||||
vec4 tfx(vec4 T, vec4 C)
|
||||
{
|
||||
vec4 c_out;
|
||||
vec4 FxT = trunc(trunc(c) * t / 128.0f);
|
||||
vec4 C_out;
|
||||
vec4 FxT = trunc(trunc(C) * T / 128.0f);
|
||||
|
||||
#if (PS_TFX == 0)
|
||||
c_out = FxT;
|
||||
C_out = FxT;
|
||||
#elif (PS_TFX == 1)
|
||||
c_out = t;
|
||||
C_out = T;
|
||||
#elif (PS_TFX == 2)
|
||||
c_out.rgb = FxT.rgb + c.a;
|
||||
c_out.a = t.a + c.a;
|
||||
C_out.rgb = FxT.rgb + C.a;
|
||||
C_out.a = T.a + C.a;
|
||||
#elif (PS_TFX == 3)
|
||||
c_out.rgb = FxT.rgb + c.a;
|
||||
c_out.a = t.a;
|
||||
C_out.rgb = FxT.rgb + C.a;
|
||||
C_out.a = T.a;
|
||||
#else
|
||||
c_out = c;
|
||||
C_out = C;
|
||||
#endif
|
||||
|
||||
#if (PS_TCC == 0)
|
||||
c_out.a = c.a;
|
||||
C_out.a = C.a;
|
||||
#endif
|
||||
|
||||
// Normalize the value
|
||||
c_out /= 255.0f;
|
||||
#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)
|
||||
// Clamp only when it is useful
|
||||
C_out = min(C_out, 255.0f);
|
||||
#endif
|
||||
|
||||
return clamp(c_out, vec4(0.0f), vec4(1.0f));
|
||||
return C_out;
|
||||
}
|
||||
|
||||
void atst(vec4 c)
|
||||
void atst(vec4 C)
|
||||
{
|
||||
float a = trunc(c.a * 255.0 + 0.01);
|
||||
// FIXME use integer cmp
|
||||
float a = C.a;
|
||||
|
||||
#if (PS_ATST == 0) // never
|
||||
discard;
|
||||
|
@ -346,72 +349,72 @@ void atst(vec4 c)
|
|||
#endif
|
||||
}
|
||||
|
||||
void colclip(inout vec4 c)
|
||||
void colclip(inout vec4 C)
|
||||
{
|
||||
#if (PS_COLCLIP == 2)
|
||||
c.rgb = 256.0f/255.0f - c.rgb;
|
||||
C.rgb = 256.0f - C.rgb;
|
||||
#endif
|
||||
#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)
|
||||
bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));
|
||||
c.rgb *= vec3(factor);
|
||||
bvec3 factor = lessThan(C.rgb, vec3(128.0f));
|
||||
C.rgb *= vec3(factor);
|
||||
#endif
|
||||
}
|
||||
|
||||
void fog(inout vec4 c, float f)
|
||||
void fog(inout vec4 C, float f)
|
||||
{
|
||||
#if PS_FOG != 0
|
||||
c.rgb = mix(FogColor, c.rgb, f);
|
||||
C.rgb = trunc(mix(FogColor, C.rgb, f));
|
||||
#endif
|
||||
}
|
||||
|
||||
vec4 ps_color()
|
||||
{
|
||||
vec4 t = sample_color(PSin_t.xy, PSin_t.w);
|
||||
vec4 T = sample_color(PSin_t.xy, PSin_t.w);
|
||||
|
||||
#if PS_IIP == 1
|
||||
vec4 c = tfx(t, PSin_c);
|
||||
vec4 C = tfx(T, PSin_c);
|
||||
#else
|
||||
vec4 c = tfx(t, PSin_fc);
|
||||
vec4 C = tfx(T, PSin_fc);
|
||||
#endif
|
||||
|
||||
atst(c);
|
||||
atst(C);
|
||||
|
||||
fog(c, PSin_t.z);
|
||||
fog(C, PSin_t.z);
|
||||
|
||||
colclip(c);
|
||||
colclip(C);
|
||||
|
||||
#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes
|
||||
c.rgb = vec3(1.0f, 1.0f, 1.0f);
|
||||
C.rgb = vec3(255.0f);
|
||||
#endif
|
||||
|
||||
return c;
|
||||
return C;
|
||||
}
|
||||
|
||||
void ps_fbmask(inout vec4 c)
|
||||
void ps_fbmask(inout vec4 C)
|
||||
{
|
||||
// FIXME do I need special case for 16 bits
|
||||
#if PS_FBMASK
|
||||
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
||||
uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);
|
||||
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
|
||||
c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;
|
||||
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
|
||||
C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)) / 255.0f;
|
||||
#endif
|
||||
}
|
||||
|
||||
void ps_blend(inout vec4 c, in float As)
|
||||
void ps_blend(inout vec4 Color, float As)
|
||||
{
|
||||
#if SW_BLEND
|
||||
vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
|
||||
vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);
|
||||
|
||||
#if PS_DFMT == FMT_24
|
||||
float Ad = 1.0f;
|
||||
#else
|
||||
// FIXME FMT_16 case
|
||||
// FIXME Ad or Ad * 2?
|
||||
float Ad = rt.a * 255.0f / 128.0f;
|
||||
float Ad = RT.a / 128.0f;
|
||||
#endif
|
||||
|
||||
// Let the compiler do its jobs !
|
||||
vec3 Cd = rt.rgb;
|
||||
vec3 Cs = c.rgb;
|
||||
vec3 Cd = RT.rgb;
|
||||
vec3 Cs = Color.rgb;
|
||||
|
||||
#if PS_BLEND_A == 0
|
||||
vec3 A = Cs;
|
||||
|
@ -446,9 +449,9 @@ void ps_blend(inout vec4 c, in float As)
|
|||
#endif
|
||||
|
||||
#if PS_BLEND_A == PS_BLEND_B
|
||||
c.rgb = D;
|
||||
Color.rgb = D;
|
||||
#else
|
||||
c.rgb = ((A - B) * C) + D;
|
||||
Color.rgb = ((A - B) * C) + D;
|
||||
#endif
|
||||
|
||||
// FIXME dithering
|
||||
|
@ -456,7 +459,7 @@ void ps_blend(inout vec4 c, in float As)
|
|||
// Correct the Color value based on the output format
|
||||
#if PS_COLCLIP != 3
|
||||
// Standard Clamp
|
||||
c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));
|
||||
Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));
|
||||
#endif
|
||||
|
||||
// Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy
|
||||
|
@ -465,15 +468,11 @@ void ps_blend(inout vec4 c, in float As)
|
|||
#if PS_DFMT == FMT_16
|
||||
// In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania
|
||||
|
||||
// Basically we want to do 'c.rgb &= 0xF8' in denormalized mode
|
||||
c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f;
|
||||
Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xF8));
|
||||
#elif PS_COLCLIP == 3
|
||||
// Basically we want to do 'c.rgb &= 0xFF' in denormalized mode
|
||||
c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f;
|
||||
Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xFF));
|
||||
#endif
|
||||
|
||||
// Don't compile => unable to find compatible overloaded function "mod(vec3)"
|
||||
//c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -515,29 +514,29 @@ void ps_main()
|
|||
}
|
||||
#endif
|
||||
|
||||
vec4 c = ps_color();
|
||||
vec4 C = ps_color();
|
||||
#if (APITRACE_DEBUG & 1) == 1
|
||||
c.r = 1.0f;
|
||||
C.r = 255f;
|
||||
#endif
|
||||
#if (APITRACE_DEBUG & 2) == 2
|
||||
c.g = 1.0f;
|
||||
C.g = 255f;
|
||||
#endif
|
||||
#if (APITRACE_DEBUG & 4) == 4
|
||||
c.b = 1.0f;
|
||||
C.b = 255f;
|
||||
#endif
|
||||
#if (APITRACE_DEBUG & 8) == 8
|
||||
c.a = 0.5f;
|
||||
C.a = 128f;
|
||||
#endif
|
||||
|
||||
#if PS_SHUFFLE
|
||||
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
|
||||
uvec4 denorm_c = uvec4(C);
|
||||
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
|
||||
|
||||
// Write RB part. Mask will take care of the correct destination
|
||||
#if PS_READ_BA
|
||||
c.rb = c.bb;
|
||||
C.rb = C.bb;
|
||||
#else
|
||||
c.rb = c.rr;
|
||||
C.rb = C.rr;
|
||||
#endif
|
||||
|
||||
// FIXME precompute my_TA & 0x80
|
||||
|
@ -549,63 +548,63 @@ void ps_main()
|
|||
// bit field operation requires GL4 HW. Could be nice to merge it with step/mix below
|
||||
// uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;
|
||||
// denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);
|
||||
// c.ga = vec2(float(denorm_c.a)/ 255.0f);
|
||||
// c.ga = vec2(float(denorm_c.a));
|
||||
|
||||
if (bool(denorm_c.a & 0x80u))
|
||||
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
|
||||
C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
|
||||
#else
|
||||
if (bool(denorm_c.g & 0x80u))
|
||||
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));
|
||||
else
|
||||
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
|
||||
C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));
|
||||
|
||||
// Nice idea but step/mix requires 4 instructions
|
||||
// set / trunc / I2F / Mad
|
||||
//
|
||||
// float sel = step(128.0f/255.0f, c.g);
|
||||
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)) / 255.0f;
|
||||
// float sel = step(128.0f, c.g);
|
||||
// vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));
|
||||
// c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// Must be done before alpha correction
|
||||
float alpha_blend = c.a * 255.0f / 128.0f;
|
||||
float alpha_blend = C.a / 128.0f;
|
||||
|
||||
// Correct the ALPHA value based on the output format
|
||||
// FIXME add support of alpha mask to replace properly PS_AOUT
|
||||
#if (PS_DFMT == FMT_16) || (PS_AOUT)
|
||||
float a = 128.0f / 255.0; // alpha output will be 0x80
|
||||
c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;
|
||||
float A_one = 128.0f; // alpha output will be 0x80
|
||||
C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;
|
||||
#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)
|
||||
if(c.a < 0.5) c.a += 128.0f/255.0f;
|
||||
if(C.a < 128.0f) C.a += 128.0f;
|
||||
#endif
|
||||
|
||||
// Get first primitive that will write a failling alpha value
|
||||
#if PS_DATE == 1 && !defined(DISABLE_GL42_image)
|
||||
// DATM == 0
|
||||
// Pixel with alpha equal to 1 will failed (128-255)
|
||||
if (c.a > 127.5f / 255.0f) {
|
||||
if (C.a > 127.5f) {
|
||||
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
|
||||
return;
|
||||
}
|
||||
#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)
|
||||
// DATM == 1
|
||||
// Pixel with alpha equal to 0 will failed (0-127)
|
||||
if (c.a < 127.5f / 255.0f) {
|
||||
if (C.a < 127.5f) {
|
||||
imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
ps_blend(c, alpha_blend);
|
||||
ps_blend(C, alpha_blend);
|
||||
|
||||
ps_fbmask(c);
|
||||
ps_fbmask(C);
|
||||
|
||||
SV_Target0 = c;
|
||||
SV_Target0 = C / 255.0f;
|
||||
SV_Target1 = vec4(alpha_blend);
|
||||
}
|
||||
|
||||
|
|
|
@ -1151,38 +1151,41 @@ static const char* tfx_fs_all_glsl =
|
|||
" return trunc(t * 255.0f);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 tfx(vec4 t, vec4 c)\n"
|
||||
"vec4 tfx(vec4 T, vec4 C)\n"
|
||||
"{\n"
|
||||
" vec4 c_out;\n"
|
||||
" vec4 FxT = trunc(trunc(c) * t / 128.0f);\n"
|
||||
" vec4 C_out;\n"
|
||||
" vec4 FxT = trunc(trunc(C) * T / 128.0f);\n"
|
||||
"\n"
|
||||
"#if (PS_TFX == 0)\n"
|
||||
" c_out = FxT;\n"
|
||||
" C_out = FxT;\n"
|
||||
"#elif (PS_TFX == 1)\n"
|
||||
" c_out = t;\n"
|
||||
" C_out = T;\n"
|
||||
"#elif (PS_TFX == 2)\n"
|
||||
" c_out.rgb = FxT.rgb + c.a;\n"
|
||||
" c_out.a = t.a + c.a;\n"
|
||||
" C_out.rgb = FxT.rgb + C.a;\n"
|
||||
" C_out.a = T.a + C.a;\n"
|
||||
"#elif (PS_TFX == 3)\n"
|
||||
" c_out.rgb = FxT.rgb + c.a;\n"
|
||||
" c_out.a = t.a;\n"
|
||||
" C_out.rgb = FxT.rgb + C.a;\n"
|
||||
" C_out.a = T.a;\n"
|
||||
"#else\n"
|
||||
" c_out = c;\n"
|
||||
" C_out = C;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#if (PS_TCC == 0)\n"
|
||||
" c_out.a = c.a;\n"
|
||||
" C_out.a = C.a;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // Normalize the value\n"
|
||||
" c_out /= 255.0f;\n"
|
||||
"#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)\n"
|
||||
" // Clamp only when it is useful\n"
|
||||
" C_out = min(C_out, 255.0f);\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" return clamp(c_out, vec4(0.0f), vec4(1.0f));\n"
|
||||
" return C_out;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void atst(vec4 c)\n"
|
||||
"void atst(vec4 C)\n"
|
||||
"{\n"
|
||||
" float a = trunc(c.a * 255.0 + 0.01);\n"
|
||||
" // FIXME use integer cmp\n"
|
||||
" float a = C.a;\n"
|
||||
"\n"
|
||||
"#if (PS_ATST == 0) // never\n"
|
||||
" discard;\n"
|
||||
|
@ -1209,72 +1212,72 @@ static const char* tfx_fs_all_glsl =
|
|||
"#endif\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void colclip(inout vec4 c)\n"
|
||||
"void colclip(inout vec4 C)\n"
|
||||
"{\n"
|
||||
"#if (PS_COLCLIP == 2)\n"
|
||||
" c.rgb = 256.0f/255.0f - c.rgb;\n"
|
||||
" C.rgb = 256.0f - C.rgb;\n"
|
||||
"#endif\n"
|
||||
"#if (PS_COLCLIP == 1 || PS_COLCLIP == 2)\n"
|
||||
" bvec3 factor = lessThan(c.rgb, vec3(128.0f/255.0f));\n"
|
||||
" c.rgb *= vec3(factor);\n"
|
||||
" bvec3 factor = lessThan(C.rgb, vec3(128.0f));\n"
|
||||
" C.rgb *= vec3(factor);\n"
|
||||
"#endif\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void fog(inout vec4 c, float f)\n"
|
||||
"void fog(inout vec4 C, float f)\n"
|
||||
"{\n"
|
||||
"#if PS_FOG != 0\n"
|
||||
" c.rgb = mix(FogColor, c.rgb, f);\n"
|
||||
" C.rgb = trunc(mix(FogColor, C.rgb, f));\n"
|
||||
"#endif\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"vec4 ps_color()\n"
|
||||
"{\n"
|
||||
" vec4 t = sample_color(PSin_t.xy, PSin_t.w);\n"
|
||||
" vec4 T = sample_color(PSin_t.xy, PSin_t.w);\n"
|
||||
"\n"
|
||||
"#if PS_IIP == 1\n"
|
||||
" vec4 c = tfx(t, PSin_c);\n"
|
||||
" vec4 C = tfx(T, PSin_c);\n"
|
||||
"#else\n"
|
||||
" vec4 c = tfx(t, PSin_fc);\n"
|
||||
" vec4 C = tfx(T, PSin_fc);\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" atst(c);\n"
|
||||
" atst(C);\n"
|
||||
"\n"
|
||||
" fog(c, PSin_t.z);\n"
|
||||
" fog(C, PSin_t.z);\n"
|
||||
"\n"
|
||||
" colclip(c);\n"
|
||||
" colclip(C);\n"
|
||||
"\n"
|
||||
"#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
|
||||
" c.rgb = vec3(1.0f, 1.0f, 1.0f);\n"
|
||||
" C.rgb = vec3(255.0f);\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" return c;\n"
|
||||
" return C;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void ps_fbmask(inout vec4 c)\n"
|
||||
"void ps_fbmask(inout vec4 C)\n"
|
||||
"{\n"
|
||||
" // FIXME do I need special case for 16 bits\n"
|
||||
"#if PS_FBMASK\n"
|
||||
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
|
||||
" uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);\n"
|
||||
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
|
||||
" c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;\n"
|
||||
" vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n"
|
||||
" C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)) / 255.0f;\n"
|
||||
"#endif\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void ps_blend(inout vec4 c, in float As)\n"
|
||||
"void ps_blend(inout vec4 Color, float As)\n"
|
||||
"{\n"
|
||||
"#if SW_BLEND\n"
|
||||
" vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n"
|
||||
" vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n"
|
||||
"\n"
|
||||
"#if PS_DFMT == FMT_24\n"
|
||||
" float Ad = 1.0f;\n"
|
||||
"#else\n"
|
||||
" // FIXME FMT_16 case\n"
|
||||
" // FIXME Ad or Ad * 2?\n"
|
||||
" float Ad = rt.a * 255.0f / 128.0f;\n"
|
||||
" float Ad = RT.a / 128.0f;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // Let the compiler do its jobs !\n"
|
||||
" vec3 Cd = rt.rgb;\n"
|
||||
" vec3 Cs = c.rgb;\n"
|
||||
" vec3 Cd = RT.rgb;\n"
|
||||
" vec3 Cs = Color.rgb;\n"
|
||||
"\n"
|
||||
"#if PS_BLEND_A == 0\n"
|
||||
" vec3 A = Cs;\n"
|
||||
|
@ -1309,9 +1312,9 @@ static const char* tfx_fs_all_glsl =
|
|||
"#endif\n"
|
||||
"\n"
|
||||
"#if PS_BLEND_A == PS_BLEND_B\n"
|
||||
" c.rgb = D;\n"
|
||||
" Color.rgb = D;\n"
|
||||
"#else\n"
|
||||
" c.rgb = ((A - B) * C) + D;\n"
|
||||
" Color.rgb = ((A - B) * C) + D;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // FIXME dithering\n"
|
||||
|
@ -1319,7 +1322,7 @@ static const char* tfx_fs_all_glsl =
|
|||
" // Correct the Color value based on the output format\n"
|
||||
"#if PS_COLCLIP != 3\n"
|
||||
" // Standard Clamp\n"
|
||||
" c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n"
|
||||
" Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy\n"
|
||||
|
@ -1328,15 +1331,11 @@ static const char* tfx_fs_all_glsl =
|
|||
"#if PS_DFMT == FMT_16\n"
|
||||
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
|
||||
"\n"
|
||||
" // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n"
|
||||
" c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f;\n"
|
||||
" Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xF8));\n"
|
||||
"#elif PS_COLCLIP == 3\n"
|
||||
" // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n"
|
||||
" c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f;\n"
|
||||
" Color.rgb = vec3(uvec3(Color.rgb) & uvec3(0xFF));\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n"
|
||||
" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n"
|
||||
"#endif\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
|
@ -1378,29 +1377,29 @@ static const char* tfx_fs_all_glsl =
|
|||
" }\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" vec4 c = ps_color();\n"
|
||||
" vec4 C = ps_color();\n"
|
||||
"#if (APITRACE_DEBUG & 1) == 1\n"
|
||||
" c.r = 1.0f;\n"
|
||||
" C.r = 255f;\n"
|
||||
"#endif\n"
|
||||
"#if (APITRACE_DEBUG & 2) == 2\n"
|
||||
" c.g = 1.0f;\n"
|
||||
" C.g = 255f;\n"
|
||||
"#endif\n"
|
||||
"#if (APITRACE_DEBUG & 4) == 4\n"
|
||||
" c.b = 1.0f;\n"
|
||||
" C.b = 255f;\n"
|
||||
"#endif\n"
|
||||
"#if (APITRACE_DEBUG & 8) == 8\n"
|
||||
" c.a = 0.5f;\n"
|
||||
" C.a = 128f;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#if PS_SHUFFLE\n"
|
||||
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
|
||||
" uvec4 denorm_c = uvec4(C);\n"
|
||||
" uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n"
|
||||
"\n"
|
||||
" // Write RB part. Mask will take care of the correct destination\n"
|
||||
"#if PS_READ_BA\n"
|
||||
" c.rb = c.bb;\n"
|
||||
" C.rb = C.bb;\n"
|
||||
"#else\n"
|
||||
" c.rb = c.rr;\n"
|
||||
" C.rb = C.rr;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // FIXME precompute my_TA & 0x80\n"
|
||||
|
@ -1412,63 +1411,63 @@ static const char* tfx_fs_all_glsl =
|
|||
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
|
||||
" // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n"
|
||||
" // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n"
|
||||
" // c.ga = vec2(float(denorm_c.a)/ 255.0f);\n"
|
||||
" // c.ga = vec2(float(denorm_c.a));\n"
|
||||
"\n"
|
||||
" if (bool(denorm_c.a & 0x80u))\n"
|
||||
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
|
||||
" C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));\n"
|
||||
" else\n"
|
||||
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
|
||||
" C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));\n"
|
||||
"\n"
|
||||
"#else\n"
|
||||
" if (bool(denorm_c.g & 0x80u))\n"
|
||||
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
|
||||
" C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));\n"
|
||||
" else\n"
|
||||
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
|
||||
" C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));\n"
|
||||
"\n"
|
||||
" // Nice idea but step/mix requires 4 instructions\n"
|
||||
" // set / trunc / I2F / Mad\n"
|
||||
" //\n"
|
||||
" // float sel = step(128.0f/255.0f, c.g);\n"
|
||||
" // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)) / 255.0f;\n"
|
||||
" // float sel = step(128.0f, c.g);\n"
|
||||
" // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));\n"
|
||||
" // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // Must be done before alpha correction\n"
|
||||
" float alpha_blend = c.a * 255.0f / 128.0f;\n"
|
||||
" float alpha_blend = C.a / 128.0f;\n"
|
||||
"\n"
|
||||
" // Correct the ALPHA value based on the output format\n"
|
||||
" // FIXME add support of alpha mask to replace properly PS_AOUT\n"
|
||||
"#if (PS_DFMT == FMT_16) || (PS_AOUT)\n"
|
||||
" float a = 128.0f / 255.0; // alpha output will be 0x80\n"
|
||||
" c.a = (PS_FBA != 0) ? a : step(0.5, c.a) * a;\n"
|
||||
" float A_one = 128.0f; // alpha output will be 0x80\n"
|
||||
" C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;\n"
|
||||
"#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n"
|
||||
" if(c.a < 0.5) c.a += 128.0f/255.0f;\n"
|
||||
" if(C.a < 128.0f) C.a += 128.0f;\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // Get first primitive that will write a failling alpha value\n"
|
||||
"#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n"
|
||||
" // DATM == 0\n"
|
||||
" // Pixel with alpha equal to 1 will failed (128-255)\n"
|
||||
" if (c.a > 127.5f / 255.0f) {\n"
|
||||
" if (C.a > 127.5f) {\n"
|
||||
" imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n"
|
||||
" // DATM == 1\n"
|
||||
" // Pixel with alpha equal to 0 will failed (0-127)\n"
|
||||
" if (c.a < 127.5f / 255.0f) {\n"
|
||||
" if (C.a < 127.5f) {\n"
|
||||
" imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" ps_blend(c, alpha_blend);\n"
|
||||
" ps_blend(C, alpha_blend);\n"
|
||||
"\n"
|
||||
" ps_fbmask(c);\n"
|
||||
" ps_fbmask(C);\n"
|
||||
"\n"
|
||||
" SV_Target0 = c;\n"
|
||||
" SV_Target0 = C / 255.0f;\n"
|
||||
" SV_Target1 = vec4(alpha_blend);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
|
|
Loading…
Reference in New Issue