From bfadd884c99935872a773b89b8eb1839754654c0 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 21 Aug 2015 00:33:45 +0200 Subject: [PATCH] glsl: expand tab into space The mix of the 2 was awful --- plugins/GSdx/res/glsl/tfx_fs.glsl | 474 +++++++++++++-------------- plugins/GSdx/res/glsl/tfx_vgs.glsl | 26 +- plugins/GSdx/res/glsl_source.h | 500 ++++++++++++++--------------- 3 files changed, 500 insertions(+), 500 deletions(-) diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index f7fb201dd5..754ffbf9b0 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -26,9 +26,9 @@ in SHADER { - vec4 t; - vec4 c; - flat vec4 fc; + vec4 t; + vec4 c; + flat vec4 fc; } PSin; #define PSin_t (PSin.t) @@ -62,234 +62,234 @@ layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min; // Warning duplicated in both GLSL file layout(std140, binding = 21) uniform cb21 { - vec3 FogColor; - float AREF; + vec3 FogColor; + float AREF; - vec4 WH; + vec4 WH; - vec2 _pad0; - vec2 TA; + vec2 _pad0; + vec2 TA; - uvec4 MskFix; + uvec4 MskFix; - uvec4 FbMask; + uvec4 FbMask; - vec3 _pad1; - float Af; + vec3 _pad1; + float Af; - vec4 HalfTexel; + vec4 HalfTexel; - vec4 MinMax; + vec4 MinMax; - vec2 TextureScale; - vec2 TC_OffsetHack; + vec2 TextureScale; + vec2 TC_OffsetHack; }; vec4 sample_c(vec2 uv) { - return texture(TextureSampler, uv); + return texture(TextureSampler, uv); } vec4 sample_p(float idx) { - return texture(PaletteSampler, vec2(idx, 0.0f)); + return texture(PaletteSampler, vec2(idx, 0.0f)); } vec4 clamp_wrap_uv(vec4 uv) { - vec4 uv_out = uv; + vec4 uv_out = uv; #if PS_WMS == PS_WMT #if PS_WMS == 2 - uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); + uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); #elif PS_WMS == 3 - uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy; + uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy; #endif #else // PS_WMS != PS_WMT #if PS_WMS == 2 - uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); + uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); #elif PS_WMS == 3 - uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; + uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx; #endif #if PS_WMT == 2 - uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); #elif PS_WMT == 3 - uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; + uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy; #endif #endif - return uv_out; + return uv_out; } mat4 sample_4c(vec4 uv) { - mat4 c; + mat4 c; // Note: texture gather can't be used because of special clamping/wrapping // Also it doesn't support lod - c[0] = sample_c(uv.xy); - c[1] = sample_c(uv.zy); - c[2] = sample_c(uv.xw); - c[3] = sample_c(uv.zw); + c[0] = sample_c(uv.xy); + c[1] = sample_c(uv.zy); + c[2] = sample_c(uv.xw); + c[3] = sample_c(uv.zw); - return c; + return c; } vec4 sample_4_index(vec4 uv) { - vec4 c; + vec4 c; - // Either GSdx will send a texture that contains a single channel - // in this case the red channel is remapped as alpha channel - // - // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel + // Either GSdx will send a texture that contains a single channel + // in this case the red channel is remapped as alpha channel + // + // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel // Note: texture gather can't be used because of special clamping/wrapping // Also it doesn't support lod - c.x = sample_c(uv.xy).a; - c.y = sample_c(uv.zy).a; - c.z = sample_c(uv.xw).a; - c.w = sample_c(uv.zw).a; + c.x = sample_c(uv.xy).a; + c.y = sample_c(uv.zy).a; + c.z = sample_c(uv.xw).a; + c.w = sample_c(uv.zw).a; - uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value + uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value #if PS_IFMT == 1 - // 4HH - return vec4(i >> 4u) / 255.0f; + // 4HH + return vec4(i >> 4u) / 255.0f; #elif PS_IFMT == 2 - // 4HL - return vec4(i & 0xFu) / 255.0f; + // 4HL + return vec4(i & 0xFu) / 255.0f; #else - // Most of texture will hit this code so keep normalized float value + // Most of texture will hit this code so keep normalized float value - // 8 bits - return c; + // 8 bits + return c; #endif } mat4 sample_4p(vec4 u) { - mat4 c; + mat4 c; - c[0] = sample_p(u.x); - c[1] = sample_p(u.y); - c[2] = sample_p(u.z); - c[3] = sample_p(u.w); + c[0] = sample_p(u.x); + c[1] = sample_p(u.y); + c[2] = sample_p(u.z); + c[3] = sample_p(u.w); - return c; + return c; } vec4 sample_color(vec2 st, float q) { - //FIXME: maybe we can set gl_Position.w = q in VS + //FIXME: maybe we can set gl_Position.w = q in VS #if (PS_FST == 0) - st /= q; + st /= q; #endif #if (PS_TCOFFSETHACK == 1) - st += TC_OffsetHack.xy; + st += TC_OffsetHack.xy; #endif - vec4 t; - mat4 c; - vec2 dd; + vec4 t; + mat4 c; + vec2 dd; // FIXME I'm not sure this condition is useful (I think code will be optimized) #if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2) - // No software LTF and pure 32 bits RGBA texure without special texture wrapping - c[0] = sample_c(st); + // No software LTF and pure 32 bits RGBA texure without special texture wrapping + c[0] = sample_c(st); #ifdef TEX_COORD_DEBUG - c[0].rg = st.xy; + c[0].rg = st.xy; #endif #else - vec4 uv; + vec4 uv; - if(PS_LTF != 0) - { - uv = st.xyxy + HalfTexel; - dd = fract(uv.xy * WH.zw); - } - else - { - uv = st.xyxy; - } + if(PS_LTF != 0) + { + uv = st.xyxy + HalfTexel; + dd = fract(uv.xy * WH.zw); + } + else + { + uv = st.xyxy; + } - uv = clamp_wrap_uv(uv); + uv = clamp_wrap_uv(uv); - if((PS_FMT & FMT_PAL) != 0) - { - c = sample_4p(sample_4_index(uv)); - } - else - { - c = sample_4c(uv); - } + if((PS_FMT & FMT_PAL) != 0) + { + c = sample_4p(sample_4_index(uv)); + } + else + { + c = sample_4c(uv); + } #ifdef TEX_COORD_DEBUG - c[0].rg = uv.xy; - c[1].rg = uv.xy; - c[2].rg = uv.xy; - c[3].rg = uv.xy; + c[0].rg = uv.xy; + c[1].rg = uv.xy; + c[2].rg = uv.xy; + c[3].rg = uv.xy; #endif #endif - // PERF: see the impact of the exansion before/after the interpolation - for (int i = 0; i < 4; i++) - { + // PERF: see the impact of the exansion before/after the interpolation + for (int i = 0; i < 4; i++) + { // PERF note: using dot product reduces by 1 the number of instruction // but I'm not sure it is equivalent neither faster. //float sum = dot(c[i].rgb, vec3(1.0f)); #if ((PS_FMT & ~FMT_PAL) == FMT_24) - c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; - //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; + c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; + //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; #elif ((PS_FMT & ~FMT_PAL) == FMT_16) - c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; - //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; + c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; + //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; #endif - } + } #if(PS_LTF != 0) - t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y); + t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y); #else - t = c[0]; + t = c[0]; #endif - // The 0.05f helps to fix the overbloom of sotc - // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit) - // interpolation could be slightly below the correct one. - return trunc(t * 255.0f + 0.05f); + // The 0.05f helps to fix the overbloom of sotc + // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit) + // interpolation could be slightly below the correct one. + return trunc(t * 255.0f + 0.05f); } vec4 tfx(vec4 T, vec4 C) { - vec4 C_out; - vec4 FxT = trunc(trunc(C) * T / 128.0f); + vec4 C_out; + vec4 FxT = trunc(trunc(C) * T / 128.0f); #if (PS_TFX == 0) - C_out = FxT; + C_out = FxT; #elif (PS_TFX == 1) - C_out = T; + C_out = T; #elif (PS_TFX == 2) - C_out.rgb = FxT.rgb + C.a; - C_out.a = T.a + C.a; + C_out.rgb = FxT.rgb + C.a; + C_out.a = T.a + C.a; #elif (PS_TFX == 3) - C_out.rgb = FxT.rgb + C.a; - C_out.a = T.a; + C_out.rgb = FxT.rgb + C.a; + C_out.a = T.a; #else - C_out = C; + C_out = C; #endif #if (PS_TCC == 0) @@ -297,96 +297,96 @@ vec4 tfx(vec4 T, vec4 C) #endif #if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3) - // Clamp only when it is useful - C_out = min(C_out, 255.0f); + // Clamp only when it is useful + C_out = min(C_out, 255.0f); #endif - return C_out; + return C_out; } void atst(vec4 C) { - // FIXME use integer cmp - float a = C.a; + // FIXME use integer cmp + float a = C.a; #if (PS_ATST == 0) // never - discard; + discard; #elif (PS_ATST == 1) // always - // nothing to do + // nothing to do #elif (PS_ATST == 2) // l - if ((AREF - a - 0.5f) < 0.0f) - discard; + if ((AREF - a - 0.5f) < 0.0f) + discard; #elif (PS_ATST == 3 ) // le - if ((AREF - a + 0.5f) < 0.0f) - discard; + if ((AREF - a + 0.5f) < 0.0f) + discard; #elif (PS_ATST == 4) // e - if ((0.5f - abs(a - AREF)) < 0.0f) - discard; + if ((0.5f - abs(a - AREF)) < 0.0f) + discard; #elif (PS_ATST == 5) // ge - if ((a-AREF + 0.5f) < 0.0f) - discard; + if ((a-AREF + 0.5f) < 0.0f) + discard; #elif (PS_ATST == 6) // g - if ((a-AREF - 0.5f) < 0.0f) - discard; + if ((a-AREF - 0.5f) < 0.0f) + discard; #elif (PS_ATST == 7) // ne - if ((abs(a - AREF) - 0.5f) < 0.0f) - discard; + if ((abs(a - AREF) - 0.5f) < 0.0f) + discard; #endif } void fog(inout vec4 C, float f) { #if PS_FOG != 0 - C.rgb = trunc(mix(FogColor, C.rgb, f)); + C.rgb = trunc(mix(FogColor, C.rgb, f)); #endif } vec4 ps_color() { - vec4 T = sample_color(PSin_t.xy, PSin_t.w); + vec4 T = sample_color(PSin_t.xy, PSin_t.w); #if PS_IIP == 1 - vec4 C = tfx(T, PSin_c); + vec4 C = tfx(T, PSin_c); #else - vec4 C = tfx(T, PSin_fc); + vec4 C = tfx(T, PSin_fc); #endif - atst(C); + atst(C); - fog(C, PSin_t.z); + fog(C, PSin_t.z); #if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes - C.rgb = vec3(255.0f); + C.rgb = vec3(255.0f); #endif - return C; + return C; } void ps_fbmask(inout vec4 C) { - // FIXME do I need special case for 16 bits + // FIXME do I need special case for 16 bits #if PS_FBMASK - vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); - C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); + vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); + C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask)); #endif } void ps_blend(inout vec4 Color, float As) { #if SW_BLEND - vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); + vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f); #if PS_DFMT == FMT_24 - float Ad = 1.0f; + float Ad = 1.0f; #else - // FIXME FMT_16 case - // FIXME Ad or Ad * 2? - float Ad = RT.a / 128.0f; + // FIXME FMT_16 case + // FIXME Ad or Ad * 2? + float Ad = RT.a / 128.0f; #endif - // Let the compiler do its jobs ! - vec3 Cd = RT.rgb; - vec3 Cs = Color.rgb; + // Let the compiler do its jobs ! + vec3 Cd = RT.rgb; + vec3 Cs = Color.rgb; #if PS_BLEND_A == 0 vec3 A = Cs; @@ -426,26 +426,26 @@ void ps_blend(inout vec4 Color, float As) Color.rgb = trunc((A - B) * C + D); #endif - // FIXME dithering + // FIXME dithering - // Correct the Color value based on the output format + // Correct the Color value based on the output format #if PS_COLCLIP == 0 && PS_HDR == 0 - // Standard Clamp - Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f)); + // Standard Clamp + Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f)); #endif - // FIXME rouding of negative float? - // compiler uses trunc but it might need floor + // FIXME rouding of negative float? + // compiler uses trunc but it might need floor // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy // GS: Color = 1, Alpha = 255 => output 1 // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 #if PS_DFMT == FMT_16 - // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania + // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania - Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8)); + Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8)); #elif PS_COLCLIP == 1 && PS_HDR == 0 - Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF)); + Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF)); #endif #endif @@ -456,141 +456,141 @@ void ps_main() #if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2) && !defined(DISABLE_GL42_image) #if PS_WRITE_RG == 1 - // Pseudo 16 bits access. - float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g; + // Pseudo 16 bits access. + float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g; #else - float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; + float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a; #endif #if (PS_DATE & 3) == 1 - // DATM == 0: Pixel with alpha equal to 1 will failed - bool bad = (127.5f / 255.0f) < rt_a; + // DATM == 0: Pixel with alpha equal to 1 will failed + bool bad = (127.5f / 255.0f) < rt_a; #elif (PS_DATE & 3) == 2 - // DATM == 1: Pixel with alpha equal to 0 will failed - bool bad = rt_a < (127.5f / 255.0f); + // DATM == 1: Pixel with alpha equal to 0 will failed + bool bad = rt_a < (127.5f / 255.0f); #endif - if (bad) { + if (bad) { #if PS_DATE >= 5 - discard; + discard; #else - imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1)); - return; + imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1)); + return; #endif - } + } #endif #if PS_DATE == 3 && !defined(DISABLE_GL42_image) - int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r; - // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update - // the bad alpha value so we must keep it. + int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r; + // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update + // the bad alpha value so we must keep it. - if (gl_PrimitiveID > stencil_ceil) { - discard; - } + if (gl_PrimitiveID > stencil_ceil) { + discard; + } #endif - vec4 C = ps_color(); + vec4 C = ps_color(); #if (APITRACE_DEBUG & 1) == 1 - C.r = 255f; + C.r = 255f; #endif #if (APITRACE_DEBUG & 2) == 2 - C.g = 255f; + C.g = 255f; #endif #if (APITRACE_DEBUG & 4) == 4 - C.b = 255f; + C.b = 255f; #endif #if (APITRACE_DEBUG & 8) == 8 - C.a = 128f; + C.a = 128f; #endif #if PS_SHUFFLE - uvec4 denorm_c = uvec4(C); - uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); + uvec4 denorm_c = uvec4(C); + uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); - // Write RB part. Mask will take care of the correct destination + // Write RB part. Mask will take care of the correct destination #if PS_READ_BA - C.rb = C.bb; + C.rb = C.bb; #else - C.rb = C.rr; + C.rb = C.rr; #endif - // FIXME precompute my_TA & 0x80 + // FIXME precompute my_TA & 0x80 - // Write GA part. Mask will take care of the correct destination - // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n" - // However Nvidia emulate it with an if (at least on kepler arch) ...\n" + // Write GA part. Mask will take care of the correct destination + // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\n" + // However Nvidia emulate it with an if (at least on kepler arch) ...\n" #if PS_READ_BA - // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below - // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x; - // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1); - // c.ga = vec2(float(denorm_c.a)); + // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below + // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x; + // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1); + // c.ga = vec2(float(denorm_c.a)); - if (bool(denorm_c.a & 0x80u)) - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); + if (bool(denorm_c.a & 0x80u)) + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u))); #else - if (bool(denorm_c.g & 0x80u)) - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); - else - C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); + if (bool(denorm_c.g & 0x80u)) + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u))); + else + C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u))); - // Nice idea but step/mix requires 4 instructions - // set / trunc / I2F / Mad - // - // float sel = step(128.0f, c.g); - // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)); - // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel); + // Nice idea but step/mix requires 4 instructions + // set / trunc / I2F / Mad + // + // float sel = step(128.0f, c.g); + // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u)); + // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel); #endif #endif - // Must be done before alpha correction - float alpha_blend = C.a / 128.0f; + // Must be done before alpha correction + float alpha_blend = C.a / 128.0f; - // Correct the ALPHA value based on the output format + // Correct the ALPHA value based on the output format #if (PS_DFMT == FMT_16) - float A_one = 128.0f; // alpha output will be 0x80 - C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one; + float A_one = 128.0f; // alpha output will be 0x80 + C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one; #elif (PS_DFMT == FMT_32) && (PS_FBA != 0) - if(C.a < 128.0f) C.a += 128.0f; + if(C.a < 128.0f) C.a += 128.0f; #endif - // Get first primitive that will write a failling alpha value + // Get first primitive that will write a failling alpha value #if PS_DATE == 1 && !defined(DISABLE_GL42_image) - // DATM == 0 - // Pixel with alpha equal to 1 will failed (128-255) - if (C.a > 127.5f) { - imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); - return; - } + // DATM == 0 + // Pixel with alpha equal to 1 will failed (128-255) + if (C.a > 127.5f) { + imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); + return; + } #elif PS_DATE == 2 && !defined(DISABLE_GL42_image) - // DATM == 1 - // Pixel with alpha equal to 0 will failed (0-127) - if (C.a < 127.5f) { - imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); - return; - } + // DATM == 1 + // Pixel with alpha equal to 0 will failed (0-127) + if (C.a < 127.5f) { + imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID); + return; + } #endif - ps_blend(C, alpha_blend); + ps_blend(C, alpha_blend); - ps_fbmask(C); + ps_fbmask(C); #if PS_HDR == 1 - // Use negative value to avoid overflow of the texture (in accumulation mode) - // Note: code were initially done for an Half-Float texture. Due to overflow - // the texture was upgraded to a full float. Maybe this code is useless now! - // Good testcase is castlevania - if (any(greaterThan(C.rgb, vec3(128.0f)))) { - C.rgb = (C.rgb - 256.0f); - } + // Use negative value to avoid overflow of the texture (in accumulation mode) + // Note: code were initially done for an Half-Float texture. Due to overflow + // the texture was upgraded to a full float. Maybe this code is useless now! + // Good testcase is castlevania + if (any(greaterThan(C.rgb, vec3(128.0f)))) { + C.rgb = (C.rgb - 256.0f); + } #endif - SV_Target0 = C / 255.0f; - SV_Target1 = vec4(alpha_blend); + SV_Target0 = C / 255.0f; + SV_Target1 = vec4(alpha_blend); } #endif diff --git a/plugins/GSdx/res/glsl/tfx_vgs.glsl b/plugins/GSdx/res/glsl/tfx_vgs.glsl index bea88bab03..4f32bf8fac 100644 --- a/plugins/GSdx/res/glsl/tfx_vgs.glsl +++ b/plugins/GSdx/res/glsl/tfx_vgs.glsl @@ -11,27 +11,27 @@ layout(std140, binding = 20) uniform cb20 // Warning duplicated in both GLSL file layout(std140, binding = 21) uniform cb21 { - vec3 FogColor; - float AREF; + vec3 FogColor; + float AREF; - vec4 WH; + vec4 WH; - vec2 _pad0; - vec2 TA; + vec2 _pad0; + vec2 TA; - uvec4 MskFix; + uvec4 MskFix; - uvec4 FbMask; + uvec4 FbMask; - vec3 _pad1; - float Af; + vec3 _pad1; + float Af; - vec4 HalfTexel; + vec4 HalfTexel; - vec4 MinMax; + vec4 MinMax; - vec2 TextureScale; - vec2 TC_OffsetHack; + vec2 TextureScale; + vec2 TC_OffsetHack; }; #ifdef VERTEX_SHADER diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index ed28dd519f..960bce4a07 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -617,27 +617,27 @@ static const char* tfx_vgs_glsl = "// Warning duplicated in both GLSL file\n" "layout(std140, binding = 21) uniform cb21\n" "{\n" - " vec3 FogColor;\n" - " float AREF;\n" + " vec3 FogColor;\n" + " float AREF;\n" "\n" - " vec4 WH;\n" + " vec4 WH;\n" "\n" - " vec2 _pad0;\n" - " vec2 TA;\n" + " vec2 _pad0;\n" + " vec2 TA;\n" "\n" - " uvec4 MskFix;\n" + " uvec4 MskFix;\n" "\n" - " uvec4 FbMask;\n" + " uvec4 FbMask;\n" "\n" - " vec3 _pad1;\n" - " float Af;\n" + " vec3 _pad1;\n" + " float Af;\n" "\n" - " vec4 HalfTexel;\n" + " vec4 HalfTexel;\n" "\n" - " vec4 MinMax;\n" + " vec4 MinMax;\n" "\n" - " vec2 TextureScale;\n" - " vec2 TC_OffsetHack;\n" + " vec2 TextureScale;\n" + " vec2 TC_OffsetHack;\n" "};\n" "\n" "#ifdef VERTEX_SHADER\n" @@ -911,9 +911,9 @@ static const char* tfx_fs_all_glsl = "\n" "in SHADER\n" "{\n" - " vec4 t;\n" - " vec4 c;\n" - " flat vec4 fc;\n" + " vec4 t;\n" + " vec4 c;\n" + " flat vec4 fc;\n" "} PSin;\n" "\n" "#define PSin_t (PSin.t)\n" @@ -947,234 +947,234 @@ static const char* tfx_fs_all_glsl = "// Warning duplicated in both GLSL file\n" "layout(std140, binding = 21) uniform cb21\n" "{\n" - " vec3 FogColor;\n" - " float AREF;\n" + " vec3 FogColor;\n" + " float AREF;\n" "\n" - " vec4 WH;\n" + " vec4 WH;\n" "\n" - " vec2 _pad0;\n" - " vec2 TA;\n" + " vec2 _pad0;\n" + " vec2 TA;\n" "\n" - " uvec4 MskFix;\n" + " uvec4 MskFix;\n" "\n" - " uvec4 FbMask;\n" + " uvec4 FbMask;\n" "\n" - " vec3 _pad1;\n" - " float Af;\n" + " vec3 _pad1;\n" + " float Af;\n" "\n" - " vec4 HalfTexel;\n" + " vec4 HalfTexel;\n" "\n" - " vec4 MinMax;\n" + " vec4 MinMax;\n" "\n" - " vec2 TextureScale;\n" - " vec2 TC_OffsetHack;\n" + " vec2 TextureScale;\n" + " vec2 TC_OffsetHack;\n" "};\n" "\n" "vec4 sample_c(vec2 uv)\n" "{\n" - " return texture(TextureSampler, uv);\n" + " return texture(TextureSampler, uv);\n" "}\n" "\n" "vec4 sample_p(float idx)\n" "{\n" - " return texture(PaletteSampler, vec2(idx, 0.0f));\n" + " return texture(PaletteSampler, vec2(idx, 0.0f));\n" "}\n" "\n" "vec4 clamp_wrap_uv(vec4 uv)\n" "{\n" - " vec4 uv_out = uv;\n" + " vec4 uv_out = uv;\n" "\n" "#if PS_WMS == PS_WMT\n" "\n" "#if PS_WMS == 2\n" - " uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n" + " uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n" "#elif PS_WMS == 3\n" - " uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n" + " uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n" "#endif\n" "\n" "#else // PS_WMS != PS_WMT\n" "\n" "#if PS_WMS == 2\n" - " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" + " uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n" "\n" "#elif PS_WMS == 3\n" - " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" + " uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n" "\n" "#endif\n" "\n" "#if PS_WMT == 2\n" - " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" + " uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n" "\n" "#elif PS_WMT == 3\n" "\n" - " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" + " uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n" "#endif\n" "\n" "#endif\n" "\n" - " return uv_out;\n" + " return uv_out;\n" "}\n" "\n" "mat4 sample_4c(vec4 uv)\n" "{\n" - " mat4 c;\n" + " mat4 c;\n" "\n" " // Note: texture gather can't be used because of special clamping/wrapping\n" " // Also it doesn't support lod\n" - " c[0] = sample_c(uv.xy);\n" - " c[1] = sample_c(uv.zy);\n" - " c[2] = sample_c(uv.xw);\n" - " c[3] = sample_c(uv.zw);\n" + " c[0] = sample_c(uv.xy);\n" + " c[1] = sample_c(uv.zy);\n" + " c[2] = sample_c(uv.xw);\n" + " c[3] = sample_c(uv.zw);\n" "\n" - " return c;\n" + " return c;\n" "}\n" "\n" "vec4 sample_4_index(vec4 uv)\n" "{\n" - " vec4 c;\n" + " vec4 c;\n" "\n" - " // Either GSdx will send a texture that contains a single channel\n" - " // in this case the red channel is remapped as alpha channel\n" - " //\n" - " // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n" + " // Either GSdx will send a texture that contains a single channel\n" + " // in this case the red channel is remapped as alpha channel\n" + " //\n" + " // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n" "\n" " // Note: texture gather can't be used because of special clamping/wrapping\n" " // Also it doesn't support lod\n" - " c.x = sample_c(uv.xy).a;\n" - " c.y = sample_c(uv.zy).a;\n" - " c.z = sample_c(uv.xw).a;\n" - " c.w = sample_c(uv.zw).a;\n" + " c.x = sample_c(uv.xy).a;\n" + " c.y = sample_c(uv.zy).a;\n" + " c.z = sample_c(uv.xw).a;\n" + " c.w = sample_c(uv.zw).a;\n" "\n" - " uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n" + " uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n" "\n" "#if PS_IFMT == 1\n" - " // 4HH\n" - " return vec4(i >> 4u) / 255.0f;\n" + " // 4HH\n" + " return vec4(i >> 4u) / 255.0f;\n" "\n" "#elif PS_IFMT == 2\n" - " // 4HL\n" - " return vec4(i & 0xFu) / 255.0f;\n" + " // 4HL\n" + " return vec4(i & 0xFu) / 255.0f;\n" "\n" "#else\n" - " // Most of texture will hit this code so keep normalized float value\n" + " // Most of texture will hit this code so keep normalized float value\n" "\n" - " // 8 bits\n" - " return c;\n" + " // 8 bits\n" + " return c;\n" "#endif\n" "\n" "}\n" "\n" "mat4 sample_4p(vec4 u)\n" "{\n" - " mat4 c;\n" + " mat4 c;\n" "\n" - " c[0] = sample_p(u.x);\n" - " c[1] = sample_p(u.y);\n" - " c[2] = sample_p(u.z);\n" - " c[3] = sample_p(u.w);\n" + " c[0] = sample_p(u.x);\n" + " c[1] = sample_p(u.y);\n" + " c[2] = sample_p(u.z);\n" + " c[3] = sample_p(u.w);\n" "\n" - " return c;\n" + " return c;\n" "}\n" "\n" "vec4 sample_color(vec2 st, float q)\n" "{\n" - " //FIXME: maybe we can set gl_Position.w = q in VS\n" + " //FIXME: maybe we can set gl_Position.w = q in VS\n" "#if (PS_FST == 0)\n" - " st /= q;\n" + " st /= q;\n" "#endif\n" "\n" "#if (PS_TCOFFSETHACK == 1)\n" - " st += TC_OffsetHack.xy;\n" + " st += TC_OffsetHack.xy;\n" "#endif\n" "\n" - " vec4 t;\n" - " mat4 c;\n" - " vec2 dd;\n" + " vec4 t;\n" + " mat4 c;\n" + " vec2 dd;\n" "\n" " // FIXME I'm not sure this condition is useful (I think code will be optimized)\n" "#if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2)\n" - " // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n" - " c[0] = sample_c(st);\n" + " // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n" + " c[0] = sample_c(st);\n" "#ifdef TEX_COORD_DEBUG\n" - " c[0].rg = st.xy;\n" + " c[0].rg = st.xy;\n" "#endif\n" "\n" "#else\n" - " vec4 uv;\n" + " vec4 uv;\n" "\n" - " if(PS_LTF != 0)\n" - " {\n" - " uv = st.xyxy + HalfTexel;\n" - " dd = fract(uv.xy * WH.zw);\n" - " }\n" - " else\n" - " {\n" - " uv = st.xyxy;\n" - " }\n" + " if(PS_LTF != 0)\n" + " {\n" + " uv = st.xyxy + HalfTexel;\n" + " dd = fract(uv.xy * WH.zw);\n" + " }\n" + " else\n" + " {\n" + " uv = st.xyxy;\n" + " }\n" "\n" - " uv = clamp_wrap_uv(uv);\n" + " uv = clamp_wrap_uv(uv);\n" "\n" - " if((PS_FMT & FMT_PAL) != 0)\n" - " {\n" - " c = sample_4p(sample_4_index(uv));\n" - " }\n" - " else\n" - " {\n" - " c = sample_4c(uv);\n" - " }\n" + " if((PS_FMT & FMT_PAL) != 0)\n" + " {\n" + " c = sample_4p(sample_4_index(uv));\n" + " }\n" + " else\n" + " {\n" + " c = sample_4c(uv);\n" + " }\n" "#ifdef TEX_COORD_DEBUG\n" - " c[0].rg = uv.xy;\n" - " c[1].rg = uv.xy;\n" - " c[2].rg = uv.xy;\n" - " c[3].rg = uv.xy;\n" + " c[0].rg = uv.xy;\n" + " c[1].rg = uv.xy;\n" + " c[2].rg = uv.xy;\n" + " c[3].rg = uv.xy;\n" "#endif\n" "\n" "#endif\n" "\n" - " // PERF: see the impact of the exansion before/after the interpolation\n" - " for (int i = 0; i < 4; i++)\n" - " {\n" + " // PERF: see the impact of the exansion before/after the interpolation\n" + " for (int i = 0; i < 4; i++)\n" + " {\n" " // PERF note: using dot product reduces by 1 the number of instruction\n" " // but I'm not sure it is equivalent neither faster.\n" " //float sum = dot(c[i].rgb, vec3(1.0f));\n" "#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n" - " c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" - " //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" + " c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" + " //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" "#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n" - " c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" - " //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" + " c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" + " //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" "#endif\n" - " }\n" + " }\n" "\n" "#if(PS_LTF != 0)\n" - " t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);\n" + " t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);\n" "#else\n" - " t = c[0];\n" + " t = c[0];\n" "#endif\n" "\n" - " // The 0.05f helps to fix the overbloom of sotc\n" - " // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)\n" - " // interpolation could be slightly below the correct one.\n" - " return trunc(t * 255.0f + 0.05f);\n" + " // The 0.05f helps to fix the overbloom of sotc\n" + " // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)\n" + " // interpolation could be slightly below the correct one.\n" + " return trunc(t * 255.0f + 0.05f);\n" "}\n" "\n" "vec4 tfx(vec4 T, vec4 C)\n" "{\n" - " vec4 C_out;\n" - " vec4 FxT = trunc(trunc(C) * T / 128.0f);\n" + " vec4 C_out;\n" + " vec4 FxT = trunc(trunc(C) * T / 128.0f);\n" "\n" "#if (PS_TFX == 0)\n" - " C_out = FxT;\n" + " C_out = FxT;\n" "#elif (PS_TFX == 1)\n" - " C_out = T;\n" + " C_out = T;\n" "#elif (PS_TFX == 2)\n" - " C_out.rgb = FxT.rgb + C.a;\n" - " C_out.a = T.a + C.a;\n" + " C_out.rgb = FxT.rgb + C.a;\n" + " C_out.a = T.a + C.a;\n" "#elif (PS_TFX == 3)\n" - " C_out.rgb = FxT.rgb + C.a;\n" - " C_out.a = T.a;\n" + " C_out.rgb = FxT.rgb + C.a;\n" + " C_out.a = T.a;\n" "#else\n" - " C_out = C;\n" + " C_out = C;\n" "#endif\n" "\n" "#if (PS_TCC == 0)\n" @@ -1182,96 +1182,96 @@ static const char* tfx_fs_all_glsl = "#endif\n" "\n" "#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)\n" - " // Clamp only when it is useful\n" - " C_out = min(C_out, 255.0f);\n" + " // Clamp only when it is useful\n" + " C_out = min(C_out, 255.0f);\n" "#endif\n" "\n" - " return C_out;\n" + " return C_out;\n" "}\n" "\n" "void atst(vec4 C)\n" "{\n" - " // FIXME use integer cmp\n" - " float a = C.a;\n" + " // FIXME use integer cmp\n" + " float a = C.a;\n" "\n" "#if (PS_ATST == 0) // never\n" - " discard;\n" + " discard;\n" "#elif (PS_ATST == 1) // always\n" - " // nothing to do\n" + " // nothing to do\n" "#elif (PS_ATST == 2) // l\n" - " if ((AREF - a - 0.5f) < 0.0f)\n" - " discard;\n" + " if ((AREF - a - 0.5f) < 0.0f)\n" + " discard;\n" "#elif (PS_ATST == 3 ) // le\n" - " if ((AREF - a + 0.5f) < 0.0f)\n" - " discard;\n" + " if ((AREF - a + 0.5f) < 0.0f)\n" + " discard;\n" "#elif (PS_ATST == 4) // e\n" - " if ((0.5f - abs(a - AREF)) < 0.0f)\n" - " discard;\n" + " if ((0.5f - abs(a - AREF)) < 0.0f)\n" + " discard;\n" "#elif (PS_ATST == 5) // ge\n" - " if ((a-AREF + 0.5f) < 0.0f)\n" - " discard;\n" + " if ((a-AREF + 0.5f) < 0.0f)\n" + " discard;\n" "#elif (PS_ATST == 6) // g\n" - " if ((a-AREF - 0.5f) < 0.0f)\n" - " discard;\n" + " if ((a-AREF - 0.5f) < 0.0f)\n" + " discard;\n" "#elif (PS_ATST == 7) // ne\n" - " if ((abs(a - AREF) - 0.5f) < 0.0f)\n" - " discard;\n" + " if ((abs(a - AREF) - 0.5f) < 0.0f)\n" + " discard;\n" "#endif\n" "}\n" "\n" "void fog(inout vec4 C, float f)\n" "{\n" "#if PS_FOG != 0\n" - " C.rgb = trunc(mix(FogColor, C.rgb, f));\n" + " C.rgb = trunc(mix(FogColor, C.rgb, f));\n" "#endif\n" "}\n" "\n" "vec4 ps_color()\n" "{\n" - " vec4 T = sample_color(PSin_t.xy, PSin_t.w);\n" + " vec4 T = sample_color(PSin_t.xy, PSin_t.w);\n" "\n" "#if PS_IIP == 1\n" - " vec4 C = tfx(T, PSin_c);\n" + " vec4 C = tfx(T, PSin_c);\n" "#else\n" - " vec4 C = tfx(T, PSin_fc);\n" + " vec4 C = tfx(T, PSin_fc);\n" "#endif\n" "\n" - " atst(C);\n" + " atst(C);\n" "\n" - " fog(C, PSin_t.z);\n" + " fog(C, PSin_t.z);\n" "\n" "#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n" - " C.rgb = vec3(255.0f);\n" + " C.rgb = vec3(255.0f);\n" "#endif\n" "\n" - " return C;\n" + " return C;\n" "}\n" "\n" "void ps_fbmask(inout vec4 C)\n" "{\n" - " // FIXME do I need special case for 16 bits\n" + " // FIXME do I need special case for 16 bits\n" "#if PS_FBMASK\n" - " vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n" - " C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));\n" + " vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n" + " C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));\n" "#endif\n" "}\n" "\n" "void ps_blend(inout vec4 Color, float As)\n" "{\n" "#if SW_BLEND\n" - " vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n" + " vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n" "\n" "#if PS_DFMT == FMT_24\n" - " float Ad = 1.0f;\n" + " float Ad = 1.0f;\n" "#else\n" - " // FIXME FMT_16 case\n" - " // FIXME Ad or Ad * 2?\n" - " float Ad = RT.a / 128.0f;\n" + " // FIXME FMT_16 case\n" + " // FIXME Ad or Ad * 2?\n" + " float Ad = RT.a / 128.0f;\n" "#endif\n" "\n" - " // Let the compiler do its jobs !\n" - " vec3 Cd = RT.rgb;\n" - " vec3 Cs = Color.rgb;\n" + " // Let the compiler do its jobs !\n" + " vec3 Cd = RT.rgb;\n" + " vec3 Cs = Color.rgb;\n" "\n" "#if PS_BLEND_A == 0\n" " vec3 A = Cs;\n" @@ -1311,26 +1311,26 @@ static const char* tfx_fs_all_glsl = " Color.rgb = trunc((A - B) * C + D);\n" "#endif\n" "\n" - " // FIXME dithering\n" + " // FIXME dithering\n" "\n" - " // Correct the Color value based on the output format\n" + " // Correct the Color value based on the output format\n" "#if PS_COLCLIP == 0 && PS_HDR == 0\n" - " // Standard Clamp\n" - " Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n" + " // Standard Clamp\n" + " Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n" "#endif\n" "\n" - " // FIXME rouding of negative float?\n" - " // compiler uses trunc but it might need floor\n" + " // FIXME rouding of negative float?\n" + " // compiler uses trunc but it might need floor\n" "\n" " // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy\n" " // GS: Color = 1, Alpha = 255 => output 1\n" " // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875\n" "#if PS_DFMT == FMT_16\n" - " // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n" + " // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n" "\n" - " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n" + " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n" "#elif PS_COLCLIP == 1 && PS_HDR == 0\n" - " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n" + " Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n" "#endif\n" "\n" "#endif\n" @@ -1341,141 +1341,141 @@ static const char* tfx_fs_all_glsl = "#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2) && !defined(DISABLE_GL42_image)\n" "\n" "#if PS_WRITE_RG == 1\n" - " // Pseudo 16 bits access.\n" - " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g;\n" + " // Pseudo 16 bits access.\n" + " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g;\n" "#else\n" - " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n" + " float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n" "#endif\n" "\n" "#if (PS_DATE & 3) == 1\n" - " // DATM == 0: Pixel with alpha equal to 1 will failed\n" - " bool bad = (127.5f / 255.0f) < rt_a;\n" + " // DATM == 0: Pixel with alpha equal to 1 will failed\n" + " bool bad = (127.5f / 255.0f) < rt_a;\n" "#elif (PS_DATE & 3) == 2\n" - " // DATM == 1: Pixel with alpha equal to 0 will failed\n" - " bool bad = rt_a < (127.5f / 255.0f);\n" + " // DATM == 1: Pixel with alpha equal to 0 will failed\n" + " bool bad = rt_a < (127.5f / 255.0f);\n" "#endif\n" "\n" - " if (bad) {\n" + " if (bad) {\n" "#if PS_DATE >= 5\n" - " discard;\n" + " discard;\n" "#else\n" - " imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n" - " return;\n" + " imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n" + " return;\n" "#endif\n" - " }\n" + " }\n" "\n" "#endif\n" "\n" "#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n" - " int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;\n" - " // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n" - " // the bad alpha value so we must keep it.\n" + " int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;\n" + " // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n" + " // the bad alpha value so we must keep it.\n" "\n" - " if (gl_PrimitiveID > stencil_ceil) {\n" - " discard;\n" - " }\n" + " if (gl_PrimitiveID > stencil_ceil) {\n" + " discard;\n" + " }\n" "#endif\n" "\n" - " vec4 C = ps_color();\n" + " vec4 C = ps_color();\n" "#if (APITRACE_DEBUG & 1) == 1\n" - " C.r = 255f;\n" + " C.r = 255f;\n" "#endif\n" "#if (APITRACE_DEBUG & 2) == 2\n" - " C.g = 255f;\n" + " C.g = 255f;\n" "#endif\n" "#if (APITRACE_DEBUG & 4) == 4\n" - " C.b = 255f;\n" + " C.b = 255f;\n" "#endif\n" "#if (APITRACE_DEBUG & 8) == 8\n" - " C.a = 128f;\n" + " C.a = 128f;\n" "#endif\n" "\n" "#if PS_SHUFFLE\n" - " uvec4 denorm_c = uvec4(C);\n" - " uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n" + " uvec4 denorm_c = uvec4(C);\n" + " uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n" "\n" - " // Write RB part. Mask will take care of the correct destination\n" + " // Write RB part. Mask will take care of the correct destination\n" "#if PS_READ_BA\n" - " C.rb = C.bb;\n" + " C.rb = C.bb;\n" "#else\n" - " C.rb = C.rr;\n" + " C.rb = C.rr;\n" "#endif\n" "\n" - " // FIXME precompute my_TA & 0x80\n" + " // FIXME precompute my_TA & 0x80\n" "\n" - " // Write GA part. Mask will take care of the correct destination\n" - " // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n" - " // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n" + " // Write GA part. Mask will take care of the correct destination\n" + " // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n" + " // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n" "#if PS_READ_BA\n" - " // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n" - " // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n" - " // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n" - " // c.ga = vec2(float(denorm_c.a));\n" + " // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n" + " // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n" + " // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n" + " // c.ga = vec2(float(denorm_c.a));\n" "\n" - " if (bool(denorm_c.a & 0x80u))\n" - " C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));\n" - " else\n" - " C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));\n" + " if (bool(denorm_c.a & 0x80u))\n" + " C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));\n" + " else\n" + " C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));\n" "\n" "#else\n" - " if (bool(denorm_c.g & 0x80u))\n" - " C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));\n" - " else\n" - " C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));\n" + " if (bool(denorm_c.g & 0x80u))\n" + " C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));\n" + " else\n" + " C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));\n" "\n" - " // Nice idea but step/mix requires 4 instructions\n" - " // set / trunc / I2F / Mad\n" - " //\n" - " // float sel = step(128.0f, c.g);\n" - " // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));\n" - " // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);\n" + " // Nice idea but step/mix requires 4 instructions\n" + " // set / trunc / I2F / Mad\n" + " //\n" + " // float sel = step(128.0f, c.g);\n" + " // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));\n" + " // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);\n" "#endif\n" "\n" "#endif\n" "\n" - " // Must be done before alpha correction\n" - " float alpha_blend = C.a / 128.0f;\n" + " // Must be done before alpha correction\n" + " float alpha_blend = C.a / 128.0f;\n" "\n" - " // Correct the ALPHA value based on the output format\n" + " // Correct the ALPHA value based on the output format\n" "#if (PS_DFMT == FMT_16)\n" - " float A_one = 128.0f; // alpha output will be 0x80\n" - " C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;\n" + " float A_one = 128.0f; // alpha output will be 0x80\n" + " C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;\n" "#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n" - " if(C.a < 128.0f) C.a += 128.0f;\n" + " if(C.a < 128.0f) C.a += 128.0f;\n" "#endif\n" "\n" - " // Get first primitive that will write a failling alpha value\n" + " // Get first primitive that will write a failling alpha value\n" "#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n" - " // DATM == 0\n" - " // Pixel with alpha equal to 1 will failed (128-255)\n" - " if (C.a > 127.5f) {\n" - " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" - " return;\n" - " }\n" + " // DATM == 0\n" + " // Pixel with alpha equal to 1 will failed (128-255)\n" + " if (C.a > 127.5f) {\n" + " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" + " return;\n" + " }\n" "#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n" - " // DATM == 1\n" - " // Pixel with alpha equal to 0 will failed (0-127)\n" - " if (C.a < 127.5f) {\n" - " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" - " return;\n" - " }\n" + " // DATM == 1\n" + " // Pixel with alpha equal to 0 will failed (0-127)\n" + " if (C.a < 127.5f) {\n" + " imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n" + " return;\n" + " }\n" "#endif\n" "\n" - " ps_blend(C, alpha_blend);\n" + " ps_blend(C, alpha_blend);\n" "\n" - " ps_fbmask(C);\n" + " ps_fbmask(C);\n" "\n" "#if PS_HDR == 1\n" - " // Use negative value to avoid overflow of the texture (in accumulation mode)\n" - " // Note: code were initially done for an Half-Float texture. Due to overflow\n" - " // the texture was upgraded to a full float. Maybe this code is useless now!\n" - " // Good testcase is castlevania\n" - " if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n" - " C.rgb = (C.rgb - 256.0f);\n" - " }\n" + " // Use negative value to avoid overflow of the texture (in accumulation mode)\n" + " // Note: code were initially done for an Half-Float texture. Due to overflow\n" + " // the texture was upgraded to a full float. Maybe this code is useless now!\n" + " // Good testcase is castlevania\n" + " if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n" + " C.rgb = (C.rgb - 256.0f);\n" + " }\n" "#endif\n" - " SV_Target0 = C / 255.0f;\n" - " SV_Target1 = vec4(alpha_blend);\n" + " SV_Target0 = C / 255.0f;\n" + " SV_Target1 = vec4(alpha_blend);\n" "}\n" "\n" "#endif\n"