pcsx2/plugins/GSdx/res/glsl_source.h

2273 lines
68 KiB
C

/*
* This file was generated by glsl2h.pl script
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "stdafx.h"
static const char* const common_header_glsl =
"//#version 420 // Keep it for editor detection\n"
"\n"
"//////////////////////////////////////////////////////////////////////\n"
"// Common Interface Definition\n"
"//////////////////////////////////////////////////////////////////////\n"
"\n"
"#ifdef VERTEX_SHADER\n"
"\n"
"out gl_PerVertex {\n"
" vec4 gl_Position;\n"
" float gl_PointSize;\n"
"#if !pGL_ES\n"
" float gl_ClipDistance[1];\n"
"#endif\n"
"};\n"
"\n"
"#endif\n"
"\n"
"\n"
"\n"
"#ifdef GEOMETRY_SHADER\n"
"\n"
"in gl_PerVertex {\n"
" vec4 gl_Position;\n"
" float gl_PointSize;\n"
"#if !pGL_ES\n"
" float gl_ClipDistance[1];\n"
"#endif\n"
"} gl_in[];\n"
"\n"
"out gl_PerVertex {\n"
" vec4 gl_Position;\n"
" float gl_PointSize;\n"
"#if !pGL_ES\n"
" float gl_ClipDistance[1];\n"
"#endif\n"
"};\n"
"\n"
"#endif\n"
"\n"
"//////////////////////////////////////////////////////////////////////\n"
"// Constant Buffer Definition\n"
"//////////////////////////////////////////////////////////////////////\n"
"// Performance note, some drivers (nouveau) will validate all Constant Buffers\n"
"// even if only one was updated.\n"
"\n"
"#ifdef FRAGMENT_SHADER\n"
"layout(std140, binding = 15) uniform cb15\n"
"{\n"
" ivec4 ScalingFactor;\n"
" ivec4 ChannelShuffle;\n"
"};\n"
"#endif\n"
"\n"
"#if defined(VERTEX_SHADER) || defined(GEOMETRY_SHADER)\n"
"layout(std140, binding = 20) uniform cb20\n"
"{\n"
" vec2 VertexScale;\n"
" vec2 VertexOffset;\n"
" uint DepthMask;\n"
" uint cb20_pad;\n"
" vec2 PointSize;\n"
"};\n"
"#endif\n"
"\n"
"#if defined(VERTEX_SHADER) || defined(FRAGMENT_SHADER)\n"
"layout(std140, binding = 21) uniform cb21\n"
"{\n"
" vec3 FogColor;\n"
" float AREF;\n"
"\n"
" vec4 WH;\n"
"\n"
" vec2 TA;\n"
" //float _pad0;\n"
" int Uber_ATST;\n"
" float Af;\n"
"\n"
" uvec4 MskFix;\n"
"\n"
" uvec4 FbMask;\n"
"\n"
" vec4 HalfTexel;\n"
"\n"
" vec4 MinMax;\n"
"\n"
" vec2 TextureScale;\n"
" vec2 TC_OffsetHack;\n"
"};\n"
"#endif\n"
"\n"
"//layout(std140, binding = 22) uniform cb22\n"
"//{\n"
"// vec4 rt_size;\n"
"//};\n"
"\n"
"//////////////////////////////////////////////////////////////////////\n"
"// Default Sampler\n"
"//////////////////////////////////////////////////////////////////////\n"
"#ifdef FRAGMENT_SHADER\n"
"\n"
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
"\n"
"#endif\n"
;
static const char* const convert_glsl =
"//#version 420 // Keep it for editor detection\n"
"\n"
"\n"
"#ifdef VERTEX_SHADER\n"
"\n"
"layout(location = 0) in vec2 POSITION;\n"
"layout(location = 1) in vec2 TEXCOORD0;\n"
"\n"
"// FIXME set the interpolation (don't know what dx do)\n"
"// flat means that there is no interpolation. The value given to the fragment shader is based on the provoking vertex conventions.\n"
"//\n"
"// noperspective means that there will be linear interpolation in window-space. This is usually not what you want, but it can have its uses.\n"
"//\n"
"// smooth, the default, means to do perspective-correct interpolation.\n"
"//\n"
"// The centroid qualifier only matters when multisampling. If this qualifier is not present, then the value is interpolated to the pixel's center, anywhere in the pixel, or to one of the pixel's samples. This sample may lie outside of the actual primitive being rendered, since a primitive can cover only part of a pixel's area. The centroid qualifier is used to prevent this; the interpolation point must fall within both the pixel's area and the primitive's area.\n"
"out SHADER\n"
"{\n"
" vec4 p;\n"
" vec2 t;\n"
"} VSout;\n"
"\n"
"void vs_main()\n"
"{\n"
" VSout.p = vec4(POSITION, 0.5f, 1.0f);\n"
" VSout.t = TEXCOORD0;\n"
" gl_Position = vec4(POSITION, 0.5f, 1.0f); // NOTE I don't know if it is possible to merge POSITION_OUT and gl_Position\n"
"}\n"
"\n"
"#endif\n"
"\n"
"#ifdef FRAGMENT_SHADER\n"
"\n"
"in SHADER\n"
"{\n"
" vec4 p;\n"
" vec2 t;\n"
"} PSin;\n"
"\n"
"// Give a different name so I remember there is a special case!\n"
"#if defined(ps_main1) || defined(ps_main10)\n"
"layout(location = 0) out uint SV_Target1;\n"
"#else\n"
"layout(location = 0) out vec4 SV_Target0;\n"
"#endif\n"
"\n"
"vec4 sample_c()\n"
"{\n"
" return texture(TextureSampler, PSin.t);\n"
"}\n"
"\n"
"vec4 ps_crt(uint i)\n"
"{\n"
" vec4 mask[4] = vec4[4]\n"
" (\n"
" vec4(1, 0, 0, 0),\n"
" vec4(0, 1, 0, 0),\n"
" vec4(0, 0, 1, 0),\n"
" vec4(1, 1, 1, 0)\n"
" );\n"
" return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n"
"}\n"
"\n"
"#ifdef ps_main0\n"
"void ps_main0()\n"
"{\n"
" SV_Target0 = sample_c();\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main1\n"
"void ps_main1()\n"
"{\n"
" // Input Color is RGBA8\n"
"\n"
" // We want to output a pixel on the PSMCT16* format\n"
" // A1-BGR5\n"
"\n"
"#if 0\n"
" // Note: dot is a good idea from pseudo. However we must be careful about float accuraccy.\n"
" // Here a global idea example:\n"
" //\n"
" // SV_Target1 = dot(round(sample_c() * vec4(31.f, 31.f, 31.f, 1.f)), vec4(1.f, 32.f, 1024.f, 32768.f));\n"
" //\n"
"\n"
" // For me this code is more accurate but it will require some tests\n"
"\n"
" vec4 c = sample_c() * 255.0f + 0.5f; // Denormalize value to avoid float precision issue\n"
"\n"
" // shift Red: -3\n"
" // shift Green: -3 + 5\n"
" // shift Blue: -3 + 10\n"
" // shift Alpha: -7 + 15\n"
" highp uvec4 i = uvec4(c * vec4(1/8.0f, 4.0f, 128.0f, 256.0f)); // Shift value\n"
"\n"
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
" SV_Target1 = (i.r & uint(0x001f)) | (i.g & uint(0x03e0)) | (i.b & uint(0x7c00)) | (i.a & uint(0x8000));\n"
"\n"
"#else\n"
" // Old code which is likely wrong.\n"
"\n"
" vec4 c = sample_c();\n"
"\n"
" c.a *= 256.0f / 127.0f; // hm, 0.5 won't give us 1.0 if we just multiply with 2\n"
"\n"
" highp uvec4 i = uvec4(c * vec4(uint(0x001f), uint(0x03e0), uint(0x7c00), uint(0x8000)));\n"
"\n"
" // bit field operation requires GL4 HW.\n"
" SV_Target1 = (i.x & uint(0x001f)) | (i.y & uint(0x03e0)) | (i.z & uint(0x7c00)) | (i.w & uint(0x8000));\n"
"#endif\n"
"\n"
"\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main10\n"
"void ps_main10()\n"
"{\n"
" // Convert a GL_FLOAT32 depth texture into a 32 bits UINT texture\n"
" SV_Target1 = uint(exp2(32.0f) * sample_c().r);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main11\n"
"void ps_main11()\n"
"{\n"
" // Convert a GL_FLOAT32 depth texture into a RGBA color texture\n"
" const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));\n"
" const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);\n"
"\n"
" vec4 res = fract(vec4(sample_c().r) * bitSh);\n"
"\n"
" SV_Target0 = (res - res.xxyz * bitMsk) * 256.0f/255.0f;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main12\n"
"void ps_main12()\n"
"{\n"
" // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture\n"
" const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));\n"
" const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);\n"
" uvec4 color = uvec4(vec4(sample_c().r) * bitSh) & bitMsk;\n"
"\n"
" SV_Target0 = vec4(color) / vec4(32.0f, 32.0f, 32.0f, 1.0f);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main13\n"
"void ps_main13()\n"
"{\n"
" // Convert a RRGBA texture into a float depth texture\n"
" // FIXME: I'm afraid of the accuracy\n"
" const vec4 bitSh = vec4(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f), exp2(-8.0f)) * vec4(255.0);\n"
" gl_FragDepth = dot(sample_c(), bitSh);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main14\n"
"void ps_main14()\n"
"{\n"
" // Same as above but without the alpha channel (24 bits Z)\n"
"\n"
" // Convert a RRGBA texture into a float depth texture\n"
" // FIXME: I'm afraid of the accuracy\n"
" const vec3 bitSh = vec3(exp2(-32.0f), exp2(-24.0f), exp2(-16.0f)) * vec3(255.0);\n"
" gl_FragDepth = dot(sample_c().rgb, bitSh);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main15\n"
"void ps_main15()\n"
"{\n"
" // Same as above but without the A/B channels (16 bits Z)\n"
"\n"
" // Convert a RRGBA texture into a float depth texture\n"
" // FIXME: I'm afraid of the accuracy\n"
" const vec2 bitSh = vec2(exp2(-32.0f), exp2(-24.0f)) * vec2(255.0);\n"
" gl_FragDepth = dot(sample_c().rg, bitSh);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main16\n"
"void ps_main16()\n"
"{\n"
" // Convert a RGB5A1 (saved as RGBA8) color to a 16 bit Z\n"
" // FIXME: I'm afraid of the accuracy\n"
" const vec4 bitSh = vec4(exp2(-32.0f), exp2(-27.0f), exp2(-22.0f), exp2(-17.0f));\n"
" // Trunc color to drop useless lsb\n"
" vec4 color = trunc(sample_c() * vec4(255.0f) / vec4(8.0f, 8.0f, 8.0f, 128.0f));\n"
" gl_FragDepth = dot(vec4(color), bitSh);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main17\n"
"void ps_main17()\n"
"{\n"
"\n"
" // Potential speed optimization. There is a high probability that\n"
" // game only want to extract a single channel (blue). It will allow\n"
" // to remove most of the conditional operation and yield a +2/3 fps\n"
" // boost on MGS3\n"
" //\n"
" // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n"
" //#define ONLY_BLUE;\n"
"\n"
" // Convert a RGBA texture into a 8 bits packed texture\n"
" // Input column: 8x2 RGBA pixels\n"
" // 0: 8 RGBA\n"
" // 1: 8 RGBA\n"
" // Output column: 16x4 Index pixels\n"
" // 0: 8 R | 8 B\n"
" // 1: 8 R | 8 B\n"
" // 2: 8 G | 8 A\n"
" // 3: 8 G | 8 A\n"
" float c;\n"
"\n"
" uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
" ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1);\n"
"\n"
" int ty = tb.y | (int(gl_FragCoord.y) & 1);\n"
" int txN = tb.x | (int(gl_FragCoord.x) & 7);\n"
" int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n"
"\n"
" txN *= ScalingFactor.x;\n"
" txH *= ScalingFactor.x;\n"
" ty *= ScalingFactor.y;\n"
"\n"
" // TODO investigate texture gather\n"
" vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n"
" vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n"
"\n"
"\n"
" if ((sel.y & 4u) == 0u) {\n"
" // Column 0 and 2\n"
"#ifdef ONLY_BLUE\n"
" c = cN.b;\n"
"#else\n"
" if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n"
" if (sel.x < 8u)\n"
" c = cN.r;\n"
" else\n"
" c = cN.b;\n"
" } else {\n"
" if (sel.x < 8u)\n"
" c = cH.g;\n"
" else\n"
" c = cH.a;\n"
" }\n"
"#endif\n"
" } else {\n"
"#ifdef ONLY_BLUE\n"
" c = cH.b;\n"
"#else\n"
" // Column 1 and 3\n"
" if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n"
" if (sel.x < 8u)\n"
" c = cH.r;\n"
" else\n"
" c = cH.b;\n"
" } else {\n"
" if (sel.x < 8u)\n"
" c = cN.g;\n"
" else\n"
" c = cN.a;\n"
" }\n"
"#endif\n"
" }\n"
"\n"
"\n"
" SV_Target0 = vec4(c);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main7\n"
"void ps_main7()\n"
"{\n"
" vec4 c = sample_c();\n"
"\n"
" c.a = dot(c.rgb, vec3(0.299, 0.587, 0.114));\n"
"\n"
" SV_Target0 = c;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main5\n"
"vec4 ps_scanlines(uint i)\n"
"{\n"
" vec4 mask[2] =\n"
" {\n"
" vec4(1, 1, 1, 0),\n"
" vec4(0, 0, 0, 0)\n"
" };\n"
"\n"
" return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n"
"}\n"
"\n"
"void ps_main5() // scanlines\n"
"{\n"
" highp uvec4 p = uvec4(gl_FragCoord);\n"
"\n"
" vec4 c = ps_scanlines(p.y % 2u);\n"
"\n"
" SV_Target0 = c;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main6\n"
"void ps_main6() // diagonal\n"
"{\n"
" highp uvec4 p = uvec4(gl_FragCoord);\n"
"\n"
" vec4 c = ps_crt((p.x + (p.y % 3u)) % 3u);\n"
"\n"
" SV_Target0 = c;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main8\n"
"void ps_main8() // triangular\n"
"{\n"
" highp uvec4 p = uvec4(gl_FragCoord);\n"
"\n"
" vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u);\n"
"\n"
" SV_Target0 = c;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main9\n"
"void ps_main9()\n"
"{\n"
"\n"
" const float PI = 3.14159265359f;\n"
"\n"
" vec2 texdim = vec2(textureSize(TextureSampler, 0));\n"
"\n"
" vec4 c;\n"
" if (dFdy(PSin.t.y) * PSin.t.y > 0.5f) {\n"
" c = sample_c();\n"
" } else {\n"
" float factor = (0.9f - 0.4f * cos(2.0f * PI * PSin.t.y * texdim.y));\n"
" c = factor * texture(TextureSampler, vec2(PSin.t.x, (floor(PSin.t.y * texdim.y) + 0.5f) / texdim.y));\n"
" }\n"
"\n"
" SV_Target0 = c;\n"
"}\n"
"#endif\n"
"\n"
"// Used for DATE (stencil)\n"
"// DATM == 1\n"
"#ifdef ps_main2\n"
"void ps_main2()\n"
"{\n"
" if(sample_c().a < (127.5f / 255.0f)) // >= 0x80 pass\n"
" discard;\n"
"}\n"
"#endif\n"
"\n"
"// Used for DATE (stencil)\n"
"// DATM == 0\n"
"#ifdef ps_main3\n"
"void ps_main3()\n"
"{\n"
" if((127.5f / 255.0f) < sample_c().a) // < 0x80 pass (== 0x80 should not pass)\n"
" discard;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main4\n"
"void ps_main4()\n"
"{\n"
" SV_Target0 = mod(round(sample_c() * 255.0f), 256.0f) / 255.0f;\n"
"}\n"
"#endif\n"
"\n"
"#endif\n"
;
static const char* const interlace_glsl =
"//#version 420 // Keep it for editor detection\n"
"\n"
"in SHADER\n"
"{\n"
" vec4 p;\n"
" vec2 t;\n"
"} PSin;\n"
"\n"
"#ifdef FRAGMENT_SHADER\n"
"\n"
"layout(std140, binding = 11) uniform cb11\n"
"{\n"
" vec2 ZrH;\n"
" float hH;\n"
"};\n"
"\n"
"layout(location = 0) out vec4 SV_Target0;\n"
"\n"
"// TODO ensure that clip (discard) is < 0 and not <= 0 ???\n"
"void ps_main0()\n"
"{\n"
" if (fract(PSin.t.y * hH) - 0.5 < 0.0)\n"
" discard;\n"
" // I'm not sure it impact us but be safe to lookup texture before conditional if\n"
" // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control\n"
" vec4 c = texture(TextureSampler, PSin.t);\n"
"\n"
" SV_Target0 = c;\n"
"}\n"
"\n"
"void ps_main1()\n"
"{\n"
" if (0.5 - fract(PSin.t.y * hH) < 0.0)\n"
" discard;\n"
" // I'm not sure it impact us but be safe to lookup texture before conditional if\n"
" // see: http://www.opengl.org/wiki/GLSL_Sampler#Non-uniform_flow_control\n"
" vec4 c = texture(TextureSampler, PSin.t);\n"
"\n"
" SV_Target0 = c;\n"
"}\n"
"\n"
"void ps_main2()\n"
"{\n"
" vec4 c0 = texture(TextureSampler, PSin.t - ZrH);\n"
" vec4 c1 = texture(TextureSampler, PSin.t);\n"
" vec4 c2 = texture(TextureSampler, PSin.t + ZrH);\n"
"\n"
" SV_Target0 = (c0 + c1 * 2.0f + c2) / 4.0f;\n"
"}\n"
"\n"
"void ps_main3()\n"
"{\n"
" SV_Target0 = texture(TextureSampler, PSin.t);\n"
"}\n"
"\n"
"#endif\n"
;
static const char* const merge_glsl =
"//#version 420 // Keep it for editor detection\n"
"\n"
"in SHADER\n"
"{\n"
" vec4 p;\n"
" vec2 t;\n"
"} PSin;\n"
"\n"
"#ifdef FRAGMENT_SHADER\n"
"\n"
"layout(std140, binding = 10) uniform cb10\n"
"{\n"
" vec4 BGColor;\n"
"};\n"
"\n"
"layout(location = 0) out vec4 SV_Target0;\n"
"\n"
"void ps_main0()\n"
"{\n"
" vec4 c = texture(TextureSampler, PSin.t);\n"
" // Note: clamping will be done by fixed unit\n"
" c.a *= 2.0f;\n"
" SV_Target0 = c;\n"
"}\n"
"\n"
"void ps_main1()\n"
"{\n"
" vec4 c = texture(TextureSampler, PSin.t);\n"
" c.a = BGColor.a;\n"
" SV_Target0 = c;\n"
"}\n"
"\n"
"#endif\n"
;
static const char* const shadeboost_glsl =
"//#version 420 // Keep it for editor detection\n"
"\n"
"/*\n"
"** Contrast, saturation, brightness\n"
"** Code of this function is from TGM's shader pack\n"
"** http://irrlicht.sourceforge.net/phpBB2/viewtopic.php?t=21057\n"
"** TGM's author comment about the license (included in the previous link)\n"
"** \"do with it, what you want! its total free!\n"
"** (but would be nice, if you say that you used my shaders :wink: ) but not necessary\"\n"
"*/\n"
"\n"
"#ifdef FRAGMENT_SHADER\n"
"\n"
"in SHADER\n"
"{\n"
" vec4 p;\n"
" vec2 t;\n"
"} PSin;\n"
"\n"
"layout(location = 0) out vec4 SV_Target0;\n"
"\n"
"// For all settings: 1.0 = 100% 0.5=50% 1.5 = 150%\n"
"vec4 ContrastSaturationBrightness(vec4 color)\n"
"{\n"
" const float sat = SB_SATURATION / 50.0;\n"
" const float brt = SB_BRIGHTNESS / 50.0;\n"
" const float con = SB_CONTRAST / 50.0;\n"
"\n"
" // Increase or decrease these values to adjust r, g and b color channels separately\n"
" const float AvgLumR = 0.5;\n"
" const float AvgLumG = 0.5;\n"
" const float AvgLumB = 0.5;\n"
"\n"
" const vec3 LumCoeff = vec3(0.2125, 0.7154, 0.0721);\n"
"\n"
" vec3 AvgLumin = vec3(AvgLumR, AvgLumG, AvgLumB);\n"
" vec3 brtColor = color.rgb * brt;\n"
" float dot_intensity = dot(brtColor, LumCoeff);\n"
" vec3 intensity = vec3(dot_intensity, dot_intensity, dot_intensity);\n"
" vec3 satColor = mix(intensity, brtColor, sat);\n"
" vec3 conColor = mix(AvgLumin, satColor, con);\n"
"\n"
" color.rgb = conColor;\n"
" return color;\n"
"}\n"
"\n"
"\n"
"void ps_main()\n"
"{\n"
" vec4 c = texture(TextureSampler, PSin.t);\n"
" SV_Target0 = ContrastSaturationBrightness(c);\n"
"}\n"
"\n"
"\n"
"#endif\n"
;
static const char* const tfx_vgs_glsl =
"//#version 420 // Keep it for text editor detection\n"
"\n"
"#ifdef VERTEX_SHADER\n"
"layout(location = 0) in vec2 i_st;\n"
"layout(location = 2) in vec4 i_c;\n"
"layout(location = 3) in float i_q;\n"
"layout(location = 4) in uvec2 i_p;\n"
"layout(location = 5) in uint i_z;\n"
"layout(location = 6) in uvec2 i_uv;\n"
"layout(location = 7) in vec4 i_f;\n"
"\n"
"out SHADER\n"
"{\n"
" vec4 t_float;\n"
" vec4 t_int;\n"
" vec4 c;\n"
" flat vec4 fc;\n"
"} VSout;\n"
"\n"
"const float exp_min32 = exp2(-32.0f);\n"
"\n"
"void texture_coord()\n"
"{\n"
" vec2 uv = vec2(i_uv);\n"
"\n"
" // Float coordinate\n"
" VSout.t_float.xy = i_st;\n"
" VSout.t_float.w = i_q;\n"
"\n"
" // Integer coordinate => normalized\n"
" VSout.t_int.xy = uv * TextureScale;\n"
" // Integer coordinate => integral\n"
" VSout.t_int.zw = uv;\n"
"}\n"
"\n"
"void vs_main()\n"
"{\n"
" highp uint z = i_z & DepthMask;\n"
"\n"
" // pos -= 0.05 (1/320 pixel) helps avoiding rounding problems (integral part of pos is usually 5 digits, 0.05 is about as low as we can go)\n"
" // example: ceil(afterseveralvertextransformations(y = 133)) => 134 => line 133 stays empty\n"
" // input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel\n"
" // example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133\n"
" vec4 p;\n"
"\n"
" p.xy = vec2(i_p) - vec2(0.05f, 0.05f);\n"
" p.xy = p.xy * VertexScale - VertexOffset;\n"
" p.w = 1.0f;\n"
" p.z = float(z) * exp_min32;\n"
"\n"
" gl_Position = p;\n"
"\n"
" texture_coord();\n"
"\n"
" VSout.c = i_c;\n"
" VSout.fc = i_c;\n"
" VSout.t_float.z = i_f.x; // pack for with texture\n"
"}\n"
"\n"
"#endif\n"
"\n"
"#ifdef GEOMETRY_SHADER\n"
"\n"
"in SHADER\n"
"{\n"
" vec4 t_float;\n"
" vec4 t_int;\n"
" vec4 c;\n"
" flat vec4 fc;\n"
"} GSin[];\n"
"\n"
"out SHADER\n"
"{\n"
" vec4 t_float;\n"
" vec4 t_int;\n"
" vec4 c;\n"
" flat vec4 fc;\n"
"} GSout;\n"
"\n"
"struct vertex\n"
"{\n"
" vec4 t_float;\n"
" vec4 t_int;\n"
" vec4 c;\n"
"};\n"
"\n"
"void out_vertex(in vertex v)\n"
"{\n"
" GSout.t_float = v.t_float;\n"
" GSout.t_int = v.t_int;\n"
" GSout.c = v.c;\n"
" // Flat output\n"
"#if GS_POINT == 1\n"
" GSout.fc = GSin[0].fc;\n"
"#else\n"
" GSout.fc = GSin[1].fc;\n"
"#endif\n"
" gl_PrimitiveID = gl_PrimitiveIDIn;\n"
" EmitVertex();\n"
"}\n"
"\n"
"#if GS_POINT == 1\n"
"layout(points) in;\n"
"#else\n"
"layout(lines) in;\n"
"#endif\n"
"layout(triangle_strip, max_vertices = 6) out;\n"
"\n"
"#if GS_POINT == 1\n"
"\n"
"void gs_main()\n"
"{\n"
" // Transform a point to a NxN sprite\n"
" vertex point = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);\n"
"\n"
" // Get new position\n"
" vec4 lt_p = gl_in[0].gl_Position;\n"
" vec4 rb_p = gl_in[0].gl_Position + vec4(PointSize.x, PointSize.y, 0.0f, 0.0f);\n"
" vec4 lb_p = rb_p;\n"
" vec4 rt_p = rb_p;\n"
" lb_p.x = lt_p.x;\n"
" rt_p.y = lt_p.y;\n"
"\n"
" // Triangle 1\n"
" gl_Position = lt_p;\n"
" out_vertex(point);\n"
"\n"
" gl_Position = lb_p;\n"
" out_vertex(point);\n"
"\n"
" gl_Position = rt_p;\n"
" out_vertex(point);\n"
" EndPrimitive();\n"
"\n"
" // Triangle 2\n"
" gl_Position = lb_p;\n"
" out_vertex(point);\n"
"\n"
" gl_Position = rt_p;\n"
" out_vertex(point);\n"
"\n"
" gl_Position = rb_p;\n"
" out_vertex(point);\n"
" EndPrimitive();\n"
"}\n"
"\n"
"#else\n"
"\n"
"void gs_main()\n"
"{\n"
" // left top => GSin[0];\n"
" // right bottom => GSin[1];\n"
" vertex rb = vertex(GSin[1].t_float, GSin[1].t_int, GSin[1].c);\n"
" vertex lt = vertex(GSin[0].t_float, GSin[0].t_int, GSin[0].c);\n"
"\n"
" vec4 rb_p = gl_in[1].gl_Position;\n"
" vec4 lb_p = rb_p;\n"
" vec4 rt_p = rb_p;\n"
" vec4 lt_p = gl_in[0].gl_Position;\n"
"\n"
" // flat depth\n"
" lt_p.z = rb_p.z;\n"
" // flat fog and texture perspective\n"
" lt.t_float.zw = rb.t_float.zw;\n"
" // flat color\n"
" lt.c = rb.c;\n"
"\n"
" // Swap texture and position coordinate\n"
" vertex lb = rb;\n"
" lb.t_float.x = lt.t_float.x;\n"
" lb.t_int.x = lt.t_int.x;\n"
" lb.t_int.z = lt.t_int.z;\n"
" lb_p.x = lt_p.x;\n"
"\n"
" vertex rt = rb;\n"
" rt_p.y = lt_p.y;\n"
" rt.t_float.y = lt.t_float.y;\n"
" rt.t_int.y = lt.t_int.y;\n"
" rt.t_int.w = lt.t_int.w;\n"
"\n"
" // Triangle 1\n"
" gl_Position = lt_p;\n"
" out_vertex(lt);\n"
"\n"
" gl_Position = lb_p;\n"
" out_vertex(lb);\n"
"\n"
" gl_Position = rt_p;\n"
" out_vertex(rt);\n"
" EndPrimitive();\n"
"\n"
" // Triangle 2\n"
" gl_Position = lb_p;\n"
" out_vertex(lb);\n"
"\n"
" gl_Position = rt_p;\n"
" out_vertex(rt);\n"
"\n"
" gl_Position = rb_p;\n"
" out_vertex(rb);\n"
" EndPrimitive();\n"
"}\n"
"\n"
"#endif\n"
"\n"
"#endif\n"
;
static const char* const tfx_fs_all_glsl =
"//#version 420 // Keep it for text editor detection\n"
"\n"
"// Require for bit operation\n"
"//#extension GL_ARB_gpu_shader5 : enable\n"
"\n"
"#define FMT_32 0\n"
"#define FMT_24 1\n"
"#define FMT_16 2\n"
"\n"
"#define PS_PAL_FMT (PS_TEX_FMT >> 2)\n"
"#define PS_AEM_FMT (PS_TEX_FMT & 3)\n"
"\n"
"// APITRACE_DEBUG enables forced pixel output to easily detect\n"
"// the fragment computed by primitive\n"
"#define APITRACE_DEBUG 0\n"
"// TEX_COORD_DEBUG output the uv coordinate as color. It is useful\n"
"// to detect bad sampling due to upscaling\n"
"//#define TEX_COORD_DEBUG\n"
"// Just copy directly the texture coordinate\n"
"#ifdef TEX_COORD_DEBUG\n"
"#define PS_TFX 1\n"
"#define PS_TCC 1\n"
"#endif\n"
"\n"
"#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)\n"
"\n"
"#ifdef FRAGMENT_SHADER\n"
"\n"
"in SHADER\n"
"{\n"
" vec4 t_float;\n"
" vec4 t_int;\n"
" vec4 c;\n"
" flat vec4 fc;\n"
"} PSin;\n"
"\n"
"// Same buffer but 2 colors for dual source blending\n"
"layout(location = 0, index = 0) out vec4 SV_Target0;\n"
"layout(location = 0, index = 1) out vec4 SV_Target1;\n"
"\n"
"layout(binding = 1) uniform sampler2D PaletteSampler;\n"
"layout(binding = 3) uniform sampler2D RtSampler; // note 2 already use by the image below\n"
"layout(binding = 4) uniform sampler2D RawTextureSampler;\n"
"\n"
"#ifndef DISABLE_GL42_image\n"
"#if PS_DATE > 0\n"
"// Performance note: images mustn't be declared if they are unused. Otherwise it will\n"
"// require extra shader validation.\n"
"\n"
"// FIXME how to declare memory access\n"
"layout(r32i, binding = 2) uniform iimage2D img_prim_min;\n"
"// WARNING:\n"
"// You can't enable it if you discard the fragment. The depth is still\n"
"// updated (shadow in Shin Megami Tensei Nocturne)\n"
"//\n"
"// early_fragment_tests must still be enabled in the first pass of the 2 passes algo\n"
"// First pass search the first primitive that will write the bad alpha value. Value\n"
"// won't be written if the fragment fails the depth test.\n"
"//\n"
"// In theory the best solution will be do\n"
"// 1/ copy the depth buffer\n"
"// 2/ do the full depth (current depth writes are disabled)\n"
"// 3/ restore the depth buffer for 2nd pass\n"
"// Of course, it is likely too costly.\n"
"#if PS_DATE == 1 || PS_DATE == 2\n"
"layout(early_fragment_tests) in;\n"
"#endif\n"
"\n"
"// I don't remember why I set this parameter but it is surely useless\n"
"//layout(pixel_center_integer) in vec4 gl_FragCoord;\n"
"#endif\n"
"#else\n"
"// use basic stencil\n"
"#endif\n"
"\n"
"vec4 sample_c(vec2 uv)\n"
"{\n"
" return texture(TextureSampler, uv);\n"
"}\n"
"\n"
"vec4 sample_p(float idx)\n"
"{\n"
" return texture(PaletteSampler, vec2(idx, 0.0f));\n"
"}\n"
"\n"
"vec4 clamp_wrap_uv(vec4 uv)\n"
"{\n"
" vec4 uv_out = uv;\n"
"\n"
"#if PS_WMS == PS_WMT\n"
"\n"
"#if PS_WMS == 2\n"
" uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw);\n"
"#elif PS_WMS == 3\n"
" uv_out = vec4((ivec4(uv * WH.xyxy) & ivec4(MskFix.xyxy)) | ivec4(MskFix.zwzw)) / WH.xyxy;\n"
"#endif\n"
"\n"
"#else // PS_WMS != PS_WMT\n"
"\n"
"#if PS_WMS == 2\n"
" uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz);\n"
"\n"
"#elif PS_WMS == 3\n"
" uv_out.xz = vec2((ivec2(uv.xz * WH.xx) & ivec2(MskFix.xx)) | ivec2(MskFix.zz)) / WH.xx;\n"
"\n"
"#endif\n"
"\n"
"#if PS_WMT == 2\n"
" uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww);\n"
"\n"
"#elif PS_WMT == 3\n"
"\n"
" uv_out.yw = vec2((ivec2(uv.yw * WH.yy) & ivec2(MskFix.yy)) | ivec2(MskFix.ww)) / WH.yy;\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
" return uv_out;\n"
"}\n"
"\n"
"mat4 sample_4c(vec4 uv)\n"
"{\n"
" mat4 c;\n"
"\n"
" // Note: texture gather can't be used because of special clamping/wrapping\n"
" // Also it doesn't support lod\n"
" c[0] = sample_c(uv.xy);\n"
" c[1] = sample_c(uv.zy);\n"
" c[2] = sample_c(uv.xw);\n"
" c[3] = sample_c(uv.zw);\n"
"\n"
" return c;\n"
"}\n"
"\n"
"vec4 sample_4_index(vec4 uv)\n"
"{\n"
" vec4 c;\n"
"\n"
" // Either GSdx will send a texture that contains a single channel\n"
" // in this case the red channel is remapped as alpha channel\n"
" //\n"
" // Or we have an old RT (ie RGBA8) that contains index (4/8) in the alpha channel\n"
"\n"
" // Note: texture gather can't be used because of special clamping/wrapping\n"
" // Also it doesn't support lod\n"
" c.x = sample_c(uv.xy).a;\n"
" c.y = sample_c(uv.zy).a;\n"
" c.z = sample_c(uv.xw).a;\n"
" c.w = sample_c(uv.zw).a;\n"
"\n"
" uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n"
"\n"
"#if PS_PAL_FMT == 1\n"
" // 4HL\n"
" return vec4(i & 0xFu) / 255.0f;\n"
"\n"
"#elif PS_PAL_FMT == 2\n"
" // 4HH\n"
" return vec4(i >> 4u) / 255.0f;\n"
"\n"
"#else\n"
" // Most of texture will hit this code so keep normalized float value\n"
"\n"
" // 8 bits\n"
" return c;\n"
"#endif\n"
"\n"
"}\n"
"\n"
"mat4 sample_4p(vec4 u)\n"
"{\n"
" mat4 c;\n"
"\n"
" c[0] = sample_p(u.x);\n"
" c[1] = sample_p(u.y);\n"
" c[2] = sample_p(u.z);\n"
" c[3] = sample_p(u.w);\n"
"\n"
" return c;\n"
"}\n"
"\n"
"int fetch_raw_depth()\n"
"{\n"
" return int(texelFetch(RawTextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));\n"
"}\n"
"\n"
"vec4 fetch_raw_color()\n"
"{\n"
" return texelFetch(RawTextureSampler, ivec2(gl_FragCoord.xy), 0);\n"
"}\n"
"\n"
"vec4 fetch_c(ivec2 uv)\n"
"{\n"
" return texelFetch(TextureSampler, ivec2(uv), 0);\n"
"}\n"
"\n"
"//////////////////////////////////////////////////////////////////////\n"
"// Depth sampling\n"
"//////////////////////////////////////////////////////////////////////\n"
"ivec2 clamp_wrap_uv_depth(ivec2 uv)\n"
"{\n"
" ivec2 uv_out = uv;\n"
"\n"
" // Keep the full precision\n"
" // It allow to multiply the ScalingFactor before the 1/16 coeff\n"
" ivec4 mask = ivec4(MskFix) << 4;\n"
"\n"
"#if PS_WMS == PS_WMT\n"
"\n"
"#if PS_WMS == 2\n"
" uv_out = clamp(uv, mask.xy, mask.zw);\n"
"#elif PS_WMS == 3\n"
" uv_out = (uv & mask.xy) | mask.zw;\n"
"#endif\n"
"\n"
"#else // PS_WMS != PS_WMT\n"
"\n"
"#if PS_WMS == 2\n"
" uv_out.x = clamp(uv.x, mask.x, mask.z);\n"
"#elif PS_WMS == 3\n"
" uv_out.x = (uv.x & mask.x) | mask.z;\n"
"#endif\n"
"\n"
"#if PS_WMT == 2\n"
" uv_out.y = clamp(uv.y, mask.y, mask.w);\n"
"#elif PS_WMT == 3\n"
" uv_out.y = (uv.y & mask.y) | mask.w;\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
" return uv_out;\n"
"}\n"
"\n"
"vec4 sample_depth(vec2 st)\n"
"{\n"
" vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScalingFactor.xy) * vec2(1.0f/16.0f);\n"
" ivec2 uv = ivec2(uv_f);\n"
"\n"
" vec4 t = vec4(0.0f);\n"
"#if PS_TALES_OF_ABYSS_HLE == 1\n"
" // Warning: UV can't be used in channel effect\n"
" int depth = fetch_raw_depth();\n"
"\n"
" // Convert msb based on the palette\n"
" t = texelFetch(PaletteSampler, ivec2((depth >> 8) & 0xFF, 0), 0) * 255.0f;\n"
"\n"
"#elif PS_URBAN_CHAOS_HLE == 1\n"
" // Depth buffer is read as a RGB5A1 texture. The game try to extract the green channel.\n"
" // So it will do a first channel trick to extract lsb, value is right-shifted.\n"
" // Then a new channel trick to extract msb which will shifted to the left.\n"
" // OpenGL uses a FLOAT32 format for the depth so it requires a couple of conversion.\n"
" // To be faster both steps (msb&lsb) are done in a single pass.\n"
"\n"
" // Warning: UV can't be used in channel effect\n"
" int depth = fetch_raw_depth();\n"
"\n"
" // Convert lsb based on the palette\n"
" t = texelFetch(PaletteSampler, ivec2((depth & 0xFF), 0), 0) * 255.0f;\n"
"\n"
" // Msb is easier\n"
" float green = float((depth >> 8) & 0xFF) * 36.0f;\n"
" green = min(green, 255.0f);\n"
"\n"
" t.g += green;\n"
"\n"
"\n"
"#elif PS_DEPTH_FMT == 1\n"
" // Based on ps_main11 of convert\n"
"\n"
" // Convert a GL_FLOAT32 depth texture into a RGBA color texture\n"
" const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));\n"
" const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);\n"
"\n"
" vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);\n"
"\n"
" t = (res - res.xxyz * bitMsk) * 256.0f;\n"
"\n"
"#elif PS_DEPTH_FMT == 2\n"
" // Based on ps_main12 of convert\n"
"\n"
" // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture\n"
" const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));\n"
" const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);\n"
" uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;\n"
"\n"
" t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);\n"
"\n"
"#elif PS_DEPTH_FMT == 3\n"
" // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture\n"
" t = fetch_c(uv) * 255.0f;\n"
"\n"
"#endif\n"
"\n"
"\n"
" // warning t ranges from 0 to 255\n"
"#if (PS_AEM_FMT == FMT_24)\n"
" t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;\n"
"#elif (PS_AEM_FMT == FMT_16)\n"
" t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;\n"
"#endif\n"
"\n"
"\n"
" return t;\n"
"}\n"
"\n"
"//////////////////////////////////////////////////////////////////////\n"
"// Fetch a Single Channel\n"
"//////////////////////////////////////////////////////////////////////\n"
"vec4 fetch_red()\n"
"{\n"
"#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2\n"
" int depth = (fetch_raw_depth()) & 0xFF;\n"
" vec4 rt = vec4(depth) / 255.0f;\n"
"#else\n"
" vec4 rt = fetch_raw_color();\n"
"#endif\n"
" return sample_p(rt.r) * 255.0f;\n"
"}\n"
"\n"
"vec4 fetch_blue()\n"
"{\n"
"#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2\n"
" int depth = (fetch_raw_depth() >> 16) & 0xFF;\n"
" vec4 rt = vec4(depth) / 255.0f;\n"
"#else\n"
" vec4 rt = fetch_raw_color();\n"
"#endif\n"
" return sample_p(rt.b) * 255.0f;\n"
"}\n"
"\n"
"vec4 fetch_green()\n"
"{\n"
" vec4 rt = fetch_raw_color();\n"
" return sample_p(rt.g) * 255.0f;\n"
"}\n"
"\n"
"vec4 fetch_alpha()\n"
"{\n"
" vec4 rt = fetch_raw_color();\n"
" return sample_p(rt.a) * 255.0f;\n"
"}\n"
"\n"
"vec4 fetch_rgb()\n"
"{\n"
" vec4 rt = fetch_raw_color();\n"
" vec4 c = vec4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0f);\n"
" return c * 255.0f;\n"
"}\n"
"\n"
"vec4 fetch_gXbY()\n"
"{\n"
"#if PS_DEPTH_FMT == 1 || PS_DEPTH_FMT == 2\n"
" int depth = fetch_raw_depth();\n"
" int bg = (depth >> (8 + ChannelShuffle.w)) & 0xFF;\n"
" return vec4(bg);\n"
"#else\n"
" ivec4 rt = ivec4(fetch_raw_color() * 255.0f);\n"
" int green = (rt.g >> ChannelShuffle.w) & ChannelShuffle.z;\n"
" int blue = (rt.b << ChannelShuffle.y) & ChannelShuffle.x;\n"
" return vec4(green | blue);\n"
"#endif\n"
"}\n"
"\n"
"//////////////////////////////////////////////////////////////////////\n"
"\n"
"vec4 sample_color(vec2 st)\n"
"{\n"
"#if (PS_TCOFFSETHACK == 1)\n"
" st += TC_OffsetHack.xy;\n"
"#endif\n"
"\n"
" vec4 t;\n"
" mat4 c;\n"
" vec2 dd;\n"
"\n"
" // FIXME I'm not sure this condition is useful (I think code will be optimized)\n"
"#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)\n"
" // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n"
" c[0] = sample_c(st);\n"
"#ifdef TEX_COORD_DEBUG\n"
" c[0].rg = st.xy;\n"
"#endif\n"
"\n"
"#else\n"
" vec4 uv;\n"
"\n"
" if(PS_LTF != 0)\n"
" {\n"
" uv = st.xyxy + HalfTexel;\n"
" dd = fract(uv.xy * WH.zw);\n"
"#if (PS_FST == 0)\n"
" // Background in Shin Megami Tensei Lucifers\n"
" // I suspect that uv isn't a standard number, so fract is outside of the [0;1] range\n"
" // Note: it is free on GPU but let's do it only for float coordinate\n"
" // Strangely Dx doesn't suffer from this issue.\n"
" dd = clamp(dd, vec2(0.0f), vec2(1.0f));\n"
"#endif\n"
" }\n"
" else\n"
" {\n"
" uv = st.xyxy;\n"
" }\n"
"\n"
" uv = clamp_wrap_uv(uv);\n"
"\n"
"#if PS_PAL_FMT != 0\n"
" c = sample_4p(sample_4_index(uv));\n"
"#else\n"
" c = sample_4c(uv);\n"
"#endif\n"
"\n"
"#ifdef TEX_COORD_DEBUG\n"
" c[0].rg = uv.xy;\n"
" c[1].rg = uv.xy;\n"
" c[2].rg = uv.xy;\n"
" c[3].rg = uv.xy;\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
" // PERF note: using dot product reduces by 1 the number of instruction\n"
" // but I'm not sure it is equivalent neither faster.\n"
" for (int i = 0; i < 4; i++)\n"
" {\n"
" //float sum = dot(c[i].rgb, vec3(1.0f));\n"
"#if (PS_AEM_FMT == FMT_24)\n"
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
" //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
"#elif (PS_AEM_FMT == FMT_16)\n"
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
" //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
"#endif\n"
" }\n"
"\n"
"#if(PS_LTF != 0)\n"
" t = mix(mix(c[0], c[1], dd.x), mix(c[2], c[3], dd.x), dd.y);\n"
"#else\n"
" t = c[0];\n"
"#endif\n"
"\n"
" // The 0.05f helps to fix the overbloom of sotc\n"
" // I think the issue is related to the rounding of texture coodinate. The linear (from fixed unit)\n"
" // interpolation could be slightly below the correct one.\n"
" return trunc(t * 255.0f + 0.05f);\n"
"}\n"
"\n"
"vec4 tfx(vec4 T, vec4 C)\n"
"{\n"
" vec4 C_out;\n"
" vec4 FxT = trunc(trunc(C) * T / 128.0f);\n"
"\n"
"#if (PS_TFX == 0)\n"
" C_out = FxT;\n"
"#elif (PS_TFX == 1)\n"
" C_out = T;\n"
"#elif (PS_TFX == 2)\n"
" C_out.rgb = FxT.rgb + C.a;\n"
" C_out.a = T.a + C.a;\n"
"#elif (PS_TFX == 3)\n"
" C_out.rgb = FxT.rgb + C.a;\n"
" C_out.a = T.a;\n"
"#else\n"
" C_out = C;\n"
"#endif\n"
"\n"
"#if (PS_TCC == 0)\n"
" C_out.a = C.a;\n"
"#endif\n"
"\n"
"#if (PS_TFX == 0) || (PS_TFX == 2) || (PS_TFX == 3)\n"
" // Clamp only when it is useful\n"
" C_out = min(C_out, 255.0f);\n"
"#endif\n"
"\n"
" return C_out;\n"
"}\n"
"\n"
"void atst(vec4 C)\n"
"{\n"
" float a = C.a;\n"
"\n"
"#if 0\n"
" switch(Uber_ATST) {\n"
" case 0:\n"
" break;\n"
" case 1:\n"
" if (a > AREF) discard;\n"
" break;\n"
" case 2:\n"
" if (a < AREF) discard;\n"
" break;\n"
" case 3:\n"
" if (abs(a - AREF) > 0.5f) discard;\n"
" break;\n"
" case 4:\n"
" if (abs(a - AREF) < 0.5f) discard;\n"
" break;\n"
" }\n"
"\n"
"\n"
"#endif\n"
"\n"
"#if 1\n"
"\n"
"#if (PS_ATST == 0)\n"
" // nothing to do\n"
"#elif (PS_ATST == 1)\n"
" if (a > AREF) discard;\n"
"#elif (PS_ATST == 2)\n"
" if (a < AREF) discard;\n"
"#elif (PS_ATST == 3)\n"
" if (abs(a - AREF) > 0.5f) discard;\n"
"#elif (PS_ATST == 4)\n"
" if (abs(a - AREF) < 0.5f) discard;\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
"}\n"
"\n"
"void fog(inout vec4 C, float f)\n"
"{\n"
"#if PS_FOG != 0\n"
" C.rgb = trunc(mix(FogColor, C.rgb, f));\n"
"#endif\n"
"}\n"
"\n"
"vec4 ps_color()\n"
"{\n"
" //FIXME: maybe we can set gl_Position.w = q in VS\n"
"#if (PS_FST == 0)\n"
" vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);\n"
"#else\n"
" // Note xy are normalized coordinate\n"
" vec2 st = PSin.t_int.xy;\n"
"#endif\n"
"\n"
"#if PS_CHANNEL_FETCH == 1\n"
" vec4 T = fetch_red();\n"
"#elif PS_CHANNEL_FETCH == 2\n"
" vec4 T = fetch_green();\n"
"#elif PS_CHANNEL_FETCH == 3\n"
" vec4 T = fetch_blue();\n"
"#elif PS_CHANNEL_FETCH == 4\n"
" vec4 T = fetch_alpha();\n"
"#elif PS_CHANNEL_FETCH == 6\n"
" vec4 T = fetch_gXbY();\n"
"#elif PS_CHANNEL_FETCH == 7\n"
" vec4 T = fetch_rgb();\n"
"#elif PS_DEPTH_FMT > 0\n"
" // Integral coordinate\n"
" vec4 T = sample_depth(PSin.t_int.zw);\n"
"#else\n"
" vec4 T = sample_color(st);\n"
"#endif\n"
"\n"
"#if PS_IIP == 1\n"
" vec4 C = tfx(T, PSin.c);\n"
"#else\n"
" vec4 C = tfx(T, PSin.fc);\n"
"#endif\n"
"\n"
" atst(C);\n"
"\n"
" fog(C, PSin.t_float.z);\n"
"\n"
"#if (PS_CLR1 != 0) // needed for Cd * (As/Ad/F + 1) blending modes\n"
" C.rgb = vec3(255.0f);\n"
"#endif\n"
"\n"
" return C;\n"
"}\n"
"\n"
"void ps_fbmask(inout vec4 C)\n"
"{\n"
" // FIXME do I need special case for 16 bits\n"
"#if PS_FBMASK\n"
" vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n"
" C = vec4((uvec4(C) & ~FbMask) | (uvec4(RT) & FbMask));\n"
"#endif\n"
"}\n"
"\n"
"void ps_blend(inout vec4 Color, float As)\n"
"{\n"
"#if SW_BLEND\n"
" vec4 RT = trunc(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0) * 255.0f + 0.1f);\n"
"\n"
"#if PS_DFMT == FMT_24\n"
" float Ad = 1.0f;\n"
"#else\n"
" // FIXME FMT_16 case\n"
" // FIXME Ad or Ad * 2?\n"
" float Ad = RT.a / 128.0f;\n"
"#endif\n"
"\n"
" // Let the compiler do its jobs !\n"
" vec3 Cd = RT.rgb;\n"
" vec3 Cs = Color.rgb;\n"
"\n"
"#if PS_BLEND_A == 0\n"
" vec3 A = Cs;\n"
"#elif PS_BLEND_A == 1\n"
" vec3 A = Cd;\n"
"#else\n"
" vec3 A = vec3(0.0f);\n"
"#endif\n"
"\n"
"#if PS_BLEND_B == 0\n"
" vec3 B = Cs;\n"
"#elif PS_BLEND_B == 1\n"
" vec3 B = Cd;\n"
"#else\n"
" vec3 B = vec3(0.0f);\n"
"#endif\n"
"\n"
"#if PS_BLEND_C == 0\n"
" float C = As;\n"
"#elif PS_BLEND_C == 1\n"
" float C = Ad;\n"
"#else\n"
" float C = Af;\n"
"#endif\n"
"\n"
"#if PS_BLEND_D == 0\n"
" vec3 D = Cs;\n"
"#elif PS_BLEND_D == 1\n"
" vec3 D = Cd;\n"
"#else\n"
" vec3 D = vec3(0.0f);\n"
"#endif\n"
"\n"
"#if PS_BLEND_A == PS_BLEND_B\n"
" Color.rgb = D;\n"
"#else\n"
" Color.rgb = trunc((A - B) * C + D);\n"
"#endif\n"
"\n"
" // FIXME dithering\n"
"\n"
" // Correct the Color value based on the output format\n"
"#if PS_COLCLIP == 0 && PS_HDR == 0\n"
" // Standard Clamp\n"
" Color.rgb = clamp(Color.rgb, vec3(0.0f), vec3(255.0f));\n"
"#endif\n"
"\n"
" // FIXME rouding of negative float?\n"
" // compiler uses trunc but it might need floor\n"
"\n"
" // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy\n"
" // GS: Color = 1, Alpha = 255 => output 1\n"
" // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875\n"
"#if PS_DFMT == FMT_16\n"
" // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n"
"\n"
" Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xF8));\n"
"#elif PS_COLCLIP == 1 && PS_HDR == 0\n"
" Color.rgb = vec3(ivec3(Color.rgb) & ivec3(0xFF));\n"
"#endif\n"
"\n"
"#endif\n"
"}\n"
"\n"
"void ps_main()\n"
"{\n"
"#if ((PS_DATE & 3) == 1 || (PS_DATE & 3) == 2)\n"
"\n"
"#if PS_WRITE_RG == 1\n"
" // Pseudo 16 bits access.\n"
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).g;\n"
"#else\n"
" float rt_a = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).a;\n"
"#endif\n"
"\n"
"#if (PS_DATE & 3) == 1\n"
" // DATM == 0: Pixel with alpha equal to 1 will failed\n"
" bool bad = (127.5f / 255.0f) < rt_a;\n"
"#elif (PS_DATE & 3) == 2\n"
" // DATM == 1: Pixel with alpha equal to 0 will failed\n"
" bool bad = rt_a < (127.5f / 255.0f);\n"
"#endif\n"
"\n"
" if (bad) {\n"
"#if PS_DATE >= 5 || defined(DISABLE_GL42_image)\n"
" discard;\n"
"#else\n"
" imageStore(img_prim_min, ivec2(gl_FragCoord.xy), ivec4(-1));\n"
" return;\n"
"#endif\n"
" }\n"
"\n"
"#endif\n"
"\n"
"#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n"
" int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;\n"
" // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n"
" // the bad alpha value so we must keep it.\n"
"\n"
" if (gl_PrimitiveID > stencil_ceil) {\n"
" discard;\n"
" }\n"
"#endif\n"
"\n"
" vec4 C = ps_color();\n"
"#if (APITRACE_DEBUG & 1) == 1\n"
" C.r = 255f;\n"
"#endif\n"
"#if (APITRACE_DEBUG & 2) == 2\n"
" C.g = 255f;\n"
"#endif\n"
"#if (APITRACE_DEBUG & 4) == 4\n"
" C.b = 255f;\n"
"#endif\n"
"#if (APITRACE_DEBUG & 8) == 8\n"
" C.a = 128f;\n"
"#endif\n"
"\n"
"#if PS_SHUFFLE\n"
" uvec4 denorm_c = uvec4(C);\n"
" uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n"
"\n"
" // Write RB part. Mask will take care of the correct destination\n"
"#if PS_READ_BA\n"
" C.rb = C.bb;\n"
"#else\n"
" C.rb = C.rr;\n"
"#endif\n"
"\n"
" // FIXME precompute my_TA & 0x80\n"
"\n"
" // Write GA part. Mask will take care of the correct destination\n"
" // Note: GLSL 4.50/GL_EXT_shader_integer_mix support a mix instruction to select a component\\n\"\n"
" // However Nvidia emulate it with an if (at least on kepler arch) ...\\n\"\n"
"#if PS_READ_BA\n"
" // bit field operation requires GL4 HW. Could be nice to merge it with step/mix below\n"
" // uint my_ta = (bool(bitfieldExtract(denorm_c.a, 7, 1))) ? denorm_TA.y : denorm_TA.x;\n"
" // denorm_c.a = bitfieldInsert(denorm_c.a, bitfieldExtract(my_ta, 7, 1), 7, 1);\n"
" // c.ga = vec2(float(denorm_c.a));\n"
"\n"
" if (bool(denorm_c.a & 0x80u))\n"
" C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)));\n"
" else\n"
" C.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)));\n"
"\n"
"#else\n"
" if (bool(denorm_c.g & 0x80u))\n"
" C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)));\n"
" else\n"
" C.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)));\n"
"\n"
" // Nice idea but step/mix requires 4 instructions\n"
" // set / trunc / I2F / Mad\n"
" //\n"
" // float sel = step(128.0f, c.g);\n"
" // vec2 c_shuffle = vec2((denorm_c.gg & 0x7Fu) | (denorm_TA & 0x80u));\n"
" // c.ga = mix(c_shuffle.xx, c_shuffle.yy, sel);\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
" // Must be done before alpha correction\n"
" float alpha_blend = C.a / 128.0f;\n"
"\n"
" // Correct the ALPHA value based on the output format\n"
"#if (PS_DFMT == FMT_16)\n"
" float A_one = 128.0f; // alpha output will be 0x80\n"
" C.a = (PS_FBA != 0) ? A_one : step(128.0f, C.a) * A_one;\n"
"#elif (PS_DFMT == FMT_32) && (PS_FBA != 0)\n"
" if(C.a < 128.0f) C.a += 128.0f;\n"
"#endif\n"
"\n"
" // Get first primitive that will write a failling alpha value\n"
"#if PS_DATE == 1 && !defined(DISABLE_GL42_image)\n"
" // DATM == 0\n"
" // Pixel with alpha equal to 1 will failed (128-255)\n"
" if (C.a > 127.5f) {\n"
" imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
" }\n"
" return;\n"
"#elif PS_DATE == 2 && !defined(DISABLE_GL42_image)\n"
" // DATM == 1\n"
" // Pixel with alpha equal to 0 will failed (0-127)\n"
" if (C.a < 127.5f) {\n"
" imageAtomicMin(img_prim_min, ivec2(gl_FragCoord.xy), gl_PrimitiveID);\n"
" }\n"
" return;\n"
"#endif\n"
"\n"
" ps_blend(C, alpha_blend);\n"
"\n"
" ps_fbmask(C);\n"
"\n"
"#if PS_HDR == 1\n"
" // Use negative value to avoid overflow of the texture (in accumulation mode)\n"
" // Note: code were initially done for an Half-Float texture. Due to overflow\n"
" // the texture was upgraded to a full float. Maybe this code is useless now!\n"
" // Good testcase is castlevania\n"
" if (any(greaterThan(C.rgb, vec3(128.0f)))) {\n"
" C.rgb = (C.rgb - 256.0f);\n"
" }\n"
"#endif\n"
" SV_Target0 = C / 255.0f;\n"
" SV_Target1 = vec4(alpha_blend);\n"
"}\n"
"\n"
"#endif\n"
;
static const char* const fxaa_fx =
"#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130)\n"
"\n"
"#ifndef FXAA_GLSL_130\n"
" #define FXAA_GLSL_130 0\n"
"#endif\n"
"\n"
"#define UHQ_FXAA 1 //High Quality Fast Approximate Anti Aliasing. Adapted for GSdx from Timothy Lottes FXAA 3.11.\n"
"#define FxaaSubpixMax 0.0 //[0.00 to 1.00] Amount of subpixel aliasing removal. 0.00: Edge only antialiasing (no blurring)\n"
"#define FxaaEarlyExit 1 //[0 or 1] Use Fxaa early exit pathing. When disabled, the entire scene is antialiased(FSAA). 0 is off, 1 is on.\n"
"\n"
"/*------------------------------------------------------------------------------\n"
" [GLOBALS|FUNCTIONS]\n"
"------------------------------------------------------------------------------*/\n"
"#if (FXAA_GLSL_130 == 1)\n"
"\n"
"in SHADER\n"
"{\n"
" vec4 p;\n"
" vec2 t;\n"
"} PSin;\n"
"\n"
"layout(location = 0) out vec4 SV_Target0;\n"
"\n"
"layout(std140, binding = 14) uniform cb14\n"
"{\n"
" vec2 _xyFrame;\n"
" vec4 _rcpFrame;\n"
"};\n"
"\n"
"#else\n"
"\n"
"#if (SHADER_MODEL >= 0x400)\n"
"Texture2D Texture : register(t0);\n"
"SamplerState TextureSampler : register(s0);\n"
"#else\n"
"texture2D Texture : register(t0);\n"
"sampler2D TextureSampler : register(s0);\n"
"#define SamplerState sampler2D\n"
"#endif\n"
"\n"
"cbuffer cb0\n"
"{\n"
" float4 _rcpFrame : register(c0);\n"
"};\n"
"\n"
"struct VS_INPUT\n"
"{\n"
" float4 p : POSITION;\n"
" float2 t : TEXCOORD0;\n"
"};\n"
"\n"
"struct VS_OUTPUT\n"
"{\n"
" #if (SHADER_MODEL >= 0x400)\n"
" float4 p : SV_Position;\n"
" #else\n"
" float4 p : TEXCOORD1;\n"
" #endif\n"
" float2 t : TEXCOORD0;\n"
"};\n"
"\n"
"struct PS_OUTPUT\n"
"{\n"
" #if (SHADER_MODEL >= 0x400)\n"
" float4 c : SV_Target0;\n"
" #else\n"
" float4 c : COLOR0;\n"
" #endif\n"
"};\n"
"\n"
"#endif\n"
"\n"
"/*------------------------------------------------------------------------------\n"
" [FXAA CODE SECTION]\n"
"------------------------------------------------------------------------------*/\n"
"\n"
"#if (SHADER_MODEL >= 0x500)\n"
"#define FXAA_HLSL_5 1\n"
"#define FXAA_GATHER4_ALPHA 1\n"
"#elif (SHADER_MODEL >= 0x400)\n"
"#define FXAA_HLSL_4 1\n"
"#define FXAA_GATHER4_ALPHA 0\n"
"#elif (FXAA_GLSL_130 == 1)\n"
"#define FXAA_GATHER4_ALPHA 1\n"
"#else\n"
"#define FXAA_HLSL_3 1\n"
"#define FXAA_GATHER4_ALPHA 0\n"
"#endif\n"
"\n"
"#if (FXAA_HLSL_5 == 1)\n"
"struct FxaaTex { SamplerState smpl; Texture2D tex; };\n"
"#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n"
"#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n"
"#define FxaaTexAlpha4(t, p) t.tex.GatherAlpha(t.smpl, p)\n"
"#define FxaaTexOffAlpha4(t, p, o) t.tex.GatherAlpha(t.smpl, p, o)\n"
"#define FxaaDiscard clip(-1)\n"
"#define FxaaSat(x) saturate(x)\n"
"\n"
"#elif (FXAA_HLSL_4 == 1)\n"
"struct FxaaTex { SamplerState smpl; Texture2D tex; };\n"
"#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n"
"#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n"
"#define FxaaDiscard clip(-1)\n"
"#define FxaaSat(x) saturate(x)\n"
"\n"
"#elif (FXAA_HLSL_3 == 1)\n"
"#define FxaaTex sampler2D\n"
"#define int2 float2\n"
"#define FxaaSat(x) saturate(x)\n"
"#define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))\n"
"#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))\n"
"\n"
"#elif (FXAA_GLSL_130 == 1)\n"
"\n"
"#define int2 ivec2\n"
"#define float2 vec2\n"
"#define float3 vec3\n"
"#define float4 vec4\n"
"#define FxaaDiscard discard\n"
"#define FxaaSat(x) clamp(x, 0.0, 1.0)\n"
"#define FxaaTex sampler2D\n"
"#define FxaaTexTop(t, p) textureLod(t, p, 0.0)\n"
"#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)\n"
"#if (FXAA_GATHER4_ALPHA == 1)\n"
"// use #extension GL_ARB_gpu_shader5 : enable\n"
"#define FxaaTexAlpha4(t, p) textureGather(t, p, 3)\n"
"#define FxaaTexOffAlpha4(t, p, o) textureGatherOffset(t, p, o, 3)\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
"#define FxaaEdgeThreshold 0.063\n"
"#define FxaaEdgeThresholdMin 0.00\n"
"#define FXAA_QUALITY__P0 1.0\n"
"#define FXAA_QUALITY__P1 1.5\n"
"#define FXAA_QUALITY__P2 2.0\n"
"#define FXAA_QUALITY__P3 2.0\n"
"#define FXAA_QUALITY__P4 2.0\n"
"#define FXAA_QUALITY__P5 2.0\n"
"#define FXAA_QUALITY__P6 2.0\n"
"#define FXAA_QUALITY__P7 2.0\n"
"#define FXAA_QUALITY__P8 2.0\n"
"#define FXAA_QUALITY__P9 2.0\n"
"#define FXAA_QUALITY__P10 4.0\n"
"#define FXAA_QUALITY__P11 8.0\n"
"#define FXAA_QUALITY__P12 8.0\n"
"\n"
"/*------------------------------------------------------------------------------\n"
" [GAMMA PREPASS CODE SECTION]\n"
"------------------------------------------------------------------------------*/\n"
"float RGBLuminance(float3 color)\n"
"{\n"
" const float3 lumCoeff = float3(0.2126729, 0.7151522, 0.0721750);\n"
" return dot(color.rgb, lumCoeff);\n"
"}\n"
"\n"
"#if (FXAA_GLSL_130 == 0)\n"
"#define PixelSize float2(_rcpFrame.x, _rcpFrame.y)\n"
"#endif\n"
"\n"
"\n"
"float3 RGBGammaToLinear(float3 color, float gamma)\n"
"{\n"
" color = FxaaSat(color);\n"
" color.r = (color.r <= 0.0404482362771082) ?\n"
" color.r / 12.92 : pow((color.r + 0.055) / 1.055, gamma);\n"
" color.g = (color.g <= 0.0404482362771082) ?\n"
" color.g / 12.92 : pow((color.g + 0.055) / 1.055, gamma);\n"
" color.b = (color.b <= 0.0404482362771082) ?\n"
" color.b / 12.92 : pow((color.b + 0.055) / 1.055, gamma);\n"
"\n"
" return color;\n"
"}\n"
"\n"
"float3 LinearToRGBGamma(float3 color, float gamma)\n"
"{\n"
" color = FxaaSat(color);\n"
" color.r = (color.r <= 0.00313066844250063) ?\n"
" color.r * 12.92 : 1.055 * pow(color.r, 1.0 / gamma) - 0.055;\n"
" color.g = (color.g <= 0.00313066844250063) ?\n"
" color.g * 12.92 : 1.055 * pow(color.g, 1.0 / gamma) - 0.055;\n"
" color.b = (color.b <= 0.00313066844250063) ?\n"
" color.b * 12.92 : 1.055 * pow(color.b, 1.0 / gamma) - 0.055;\n"
"\n"
" return color;\n"
"}\n"
"\n"
"float4 PreGammaPass(float4 color, float2 uv0)\n"
"{\n"
" #if (SHADER_MODEL >= 0x400)\n"
" color = Texture.Sample(TextureSampler, uv0);\n"
" #elif (FXAA_GLSL_130 == 1)\n"
" color = texture(TextureSampler, uv0);\n"
" #else\n"
" color = tex2D(TextureSampler, uv0);\n"
" #endif\n"
"\n"
" const float GammaConst = 2.233;\n"
" color.rgb = RGBGammaToLinear(color.rgb, GammaConst);\n"
" color.rgb = LinearToRGBGamma(color.rgb, GammaConst);\n"
" color.a = RGBLuminance(color.rgb);\n"
"\n"
" return color;\n"
"}\n"
"\n"
"\n"
"/*------------------------------------------------------------------------------\n"
" [FXAA CODE SECTION]\n"
"------------------------------------------------------------------------------*/\n"
"\n"
"float FxaaLuma(float4 rgba)\n"
"{ \n"
" rgba.w = RGBLuminance(rgba.xyz);\n"
" return rgba.w; \n"
"}\n"
"\n"
"float4 FxaaPixelShader(float2 pos, FxaaTex tex, float2 fxaaRcpFrame, float fxaaSubpix, float fxaaEdgeThreshold, float fxaaEdgeThresholdMin)\n"
"{\n"
" float2 posM;\n"
" posM.x = pos.x;\n"
" posM.y = pos.y;\n"
"\n"
" #if (FXAA_GATHER4_ALPHA == 1)\n"
" float4 rgbyM = FxaaTexTop(tex, posM);\n"
" float4 luma4A = FxaaTexAlpha4(tex, posM);\n"
" float4 luma4B = FxaaTexOffAlpha4(tex, posM, int2(-1, -1));\n"
" rgbyM.w = RGBLuminance(rgbyM.xyz);\n"
"\n"
" #define lumaM rgbyM.w\n"
" #define lumaE luma4A.z\n"
" #define lumaS luma4A.x\n"
" #define lumaSE luma4A.y\n"
" #define lumaNW luma4B.w\n"
" #define lumaN luma4B.z\n"
" #define lumaW luma4B.x\n"
" \n"
" #else\n"
" float4 rgbyM = FxaaTexTop(tex, posM);\n"
" rgbyM.w = RGBLuminance(rgbyM.xyz);\n"
" #define lumaM rgbyM.w\n"
"\n"
" float lumaS = FxaaLuma(FxaaTexOff(tex, posM, int2( 0, 1), fxaaRcpFrame.xy));\n"
" float lumaE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 0), fxaaRcpFrame.xy));\n"
" float lumaN = FxaaLuma(FxaaTexOff(tex, posM, int2( 0,-1), fxaaRcpFrame.xy));\n"
" float lumaW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 0), fxaaRcpFrame.xy));\n"
" #endif\n"
"\n"
" float maxSM = max(lumaS, lumaM);\n"
" float minSM = min(lumaS, lumaM);\n"
" float maxESM = max(lumaE, maxSM);\n"
" float minESM = min(lumaE, minSM);\n"
" float maxWN = max(lumaN, lumaW);\n"
" float minWN = min(lumaN, lumaW);\n"
"\n"
" float rangeMax = max(maxWN, maxESM);\n"
" float rangeMin = min(minWN, minESM);\n"
" float range = rangeMax - rangeMin;\n"
" float rangeMaxScaled = rangeMax * fxaaEdgeThreshold;\n"
" float rangeMaxClamped = max(fxaaEdgeThresholdMin, rangeMaxScaled);\n"
"\n"
" bool earlyExit = range < rangeMaxClamped;\n"
" #if (FxaaEarlyExit == 1)\n"
" if(earlyExit) { return rgbyM; }\n"
" #endif\n"
"\n"
" #if (FXAA_GATHER4_ALPHA == 0)\n"
" float lumaNW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1,-1), fxaaRcpFrame.xy));\n"
" float lumaSE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1, 1), fxaaRcpFrame.xy));\n"
" float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy));\n"
" float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy));\n"
" #else\n"
" float lumaNE = FxaaLuma(FxaaTexOff(tex, posM, int2( 1,-1), fxaaRcpFrame.xy));\n"
" float lumaSW = FxaaLuma(FxaaTexOff(tex, posM, int2(-1, 1), fxaaRcpFrame.xy));\n"
" #endif\n"
"\n"
" float lumaNS = lumaN + lumaS;\n"
" float lumaWE = lumaW + lumaE;\n"
" float subpixRcpRange = 1.0/range;\n"
" float subpixNSWE = lumaNS + lumaWE;\n"
" float edgeHorz1 = (-2.0 * lumaM) + lumaNS;\n"
" float edgeVert1 = (-2.0 * lumaM) + lumaWE;\n"
" float lumaNESE = lumaNE + lumaSE;\n"
" float lumaNWNE = lumaNW + lumaNE;\n"
" float edgeHorz2 = (-2.0 * lumaE) + lumaNESE;\n"
" float edgeVert2 = (-2.0 * lumaN) + lumaNWNE;\n"
"\n"
" float lumaNWSW = lumaNW + lumaSW;\n"
" float lumaSWSE = lumaSW + lumaSE;\n"
" float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);\n"
" float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);\n"
" float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;\n"
" float edgeVert3 = (-2.0 * lumaS) + lumaSWSE;\n"
" float edgeHorz = abs(edgeHorz3) + edgeHorz4;\n"
" float edgeVert = abs(edgeVert3) + edgeVert4;\n"
"\n"
" float subpixNWSWNESE = lumaNWSW + lumaNESE;\n"
" float lengthSign = fxaaRcpFrame.x;\n"
" bool horzSpan = edgeHorz >= edgeVert;\n"
" float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;\n"
" if(!horzSpan) lumaN = lumaW;\n"
" if(!horzSpan) lumaS = lumaE;\n"
" if(horzSpan) lengthSign = fxaaRcpFrame.y;\n"
" float subpixB = (subpixA * (1.0/12.0)) - lumaM;\n"
"\n"
" float gradientN = lumaN - lumaM;\n"
" float gradientS = lumaS - lumaM;\n"
" float lumaNN = lumaN + lumaM;\n"
" float lumaSS = lumaS + lumaM;\n"
" bool pairN = abs(gradientN) >= abs(gradientS);\n"
" float gradient = max(abs(gradientN), abs(gradientS));\n"
" if(pairN) lengthSign = -lengthSign;\n"
" float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);\n"
"\n"
" float2 posB;\n"
" posB.x = posM.x;\n"
" posB.y = posM.y;\n"
" float2 offNP;\n"
" offNP.x = (!horzSpan) ? 0.0 : fxaaRcpFrame.x;\n"
" offNP.y = ( horzSpan) ? 0.0 : fxaaRcpFrame.y;\n"
" if(!horzSpan) posB.x += lengthSign * 0.5;\n"
" if( horzSpan) posB.y += lengthSign * 0.5;\n"
"\n"
" float2 posN;\n"
" posN.x = posB.x - offNP.x * FXAA_QUALITY__P0;\n"
" posN.y = posB.y - offNP.y * FXAA_QUALITY__P0;\n"
" float2 posP;\n"
" posP.x = posB.x + offNP.x * FXAA_QUALITY__P0;\n"
" posP.y = posB.y + offNP.y * FXAA_QUALITY__P0;\n"
" float subpixD = ((-2.0)*subpixC) + 3.0;\n"
" float lumaEndN = FxaaLuma(FxaaTexTop(tex, posN));\n"
" float subpixE = subpixC * subpixC;\n"
" float lumaEndP = FxaaLuma(FxaaTexTop(tex, posP));\n"
"\n"
" if(!pairN) lumaNN = lumaSS;\n"
" float gradientScaled = gradient * 1.0/4.0;\n"
" float lumaMM = lumaM - lumaNN * 0.5;\n"
" float subpixF = subpixD * subpixE;\n"
" bool lumaMLTZero = lumaMM < 0.0;\n"
" lumaEndN -= lumaNN * 0.5;\n"
" lumaEndP -= lumaNN * 0.5;\n"
" bool doneN = abs(lumaEndN) >= gradientScaled;\n"
" bool doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1;\n"
" bool doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11;\n"
"\n"
" if(doneNP) {\n"
" if(!doneN) lumaEndN = FxaaLuma(FxaaTexTop(tex, posN.xy));\n"
" if(!doneP) lumaEndP = FxaaLuma(FxaaTexTop(tex, posP.xy));\n"
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
" doneN = abs(lumaEndN) >= gradientScaled;\n"
" doneP = abs(lumaEndP) >= gradientScaled;\n"
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12;\n"
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12;\n"
" doneNP = (!doneN) || (!doneP);\n"
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12;\n"
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12;\n"
" }}}}}}}}}}}\n"
"\n"
" float dstN = posM.x - posN.x;\n"
" float dstP = posP.x - posM.x;\n"
" if(!horzSpan) dstN = posM.y - posN.y;\n"
" if(!horzSpan) dstP = posP.y - posM.y;\n"
"\n"
" bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;\n"
" float spanLength = (dstP + dstN);\n"
" bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;\n"
" float spanLengthRcp = 1.0/spanLength;\n"
"\n"
" bool directionN = dstN < dstP;\n"
" float dst = min(dstN, dstP);\n"
" bool goodSpan = directionN ? goodSpanN : goodSpanP;\n"
" float subpixG = subpixF * subpixF;\n"
" float pixelOffset = (dst * (-spanLengthRcp)) + 0.5;\n"
" float subpixH = subpixG * fxaaSubpix;\n"
"\n"
" float pixelOffsetGood = goodSpan ? pixelOffset : 0.0;\n"
" float pixelOffsetSubpix = max(pixelOffsetGood, subpixH);\n"
" if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;\n"
" if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;\n"
"\n"
" return float4(FxaaTexTop(tex, posM).xyz, lumaM);\n"
"}\n"
"\n"
"#if (FXAA_GLSL_130 == 1)\n"
"float4 FxaaPass(float4 FxaaColor, float2 uv0)\n"
"#else\n"
"float4 FxaaPass(float4 FxaaColor : COLOR0, float2 uv0 : TEXCOORD0)\n"
"#endif\n"
"{\n"
"\n"
" #if (SHADER_MODEL >= 0x400)\n"
" FxaaTex tex;\n"
" tex.tex = Texture;\n"
" tex.smpl = TextureSampler;\n"
"\n"
" Texture.GetDimensions(PixelSize.x, PixelSize.y);\n"
" FxaaColor = FxaaPixelShader(uv0, tex, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n"
"\n"
" #elif (FXAA_GLSL_130 == 1)\n"
"\n"
" vec2 PixelSize = textureSize(TextureSampler, 0);\n"
" FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n"
"\n"
" #else\n"
" FxaaTex tex;\n"
" tex = TextureSampler;\n"
" FxaaColor = FxaaPixelShader(uv0, tex, PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);\n"
" #endif\n"
"\n"
" return FxaaColor;\n"
"}\n"
"\n"
"/*------------------------------------------------------------------------------\n"
" [MAIN() & COMBINE PASS CODE SECTION]\n"
"------------------------------------------------------------------------------*/\n"
"#if (FXAA_GLSL_130 == 1)\n"
"\n"
"void ps_main()\n"
"{\n"
" vec4 color = texture(TextureSampler, PSin.t);\n"
" color = PreGammaPass(color, PSin.t);\n"
" color = FxaaPass(color, PSin.t);\n"
"\n"
" SV_Target0 = color;\n"
"}\n"
"\n"
"#else\n"
"\n"
"PS_OUTPUT ps_main(VS_OUTPUT input)\n"
"{\n"
" PS_OUTPUT output;\n"
"\n"
" #if (SHADER_MODEL >= 0x400)\n"
" float4 color = Texture.Sample(TextureSampler, input.t);\n"
"\n"
" color = PreGammaPass(color, input.t);\n"
" color = FxaaPass(color, input.t);\n"
" #else\n"
" float4 color = tex2D(TextureSampler, input.t);\n"
"\n"
" color = PreGammaPass(color, input.t);\n"
" color = FxaaPass(color, input.t);\n"
" #endif\n"
"\n"
" output.c = color;\n"
" \n"
" return output;\n"
"}\n"
"\n"
"#endif\n"
"\n"
"#endif\n"
;