diff --git a/linux_various/glsl2h.pl b/linux_various/glsl2h.pl index 5692d6ceb7..ecef87f2ae 100755 --- a/linux_various/glsl2h.pl +++ b/linux_various/glsl2h.pl @@ -20,13 +20,11 @@ eval { print "Disable MD5\n"; }; -my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx.glsl fxaa.fx/; +# Keep the old FXAA for now +my @gsdx_res = qw/convert.glsl interlace.glsl merge.glsl shadeboost.glsl tfx.glsl old_fxaa.fx/; my $gsdx_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res"); my $gsdx_out = File::Spec->catdir($gsdx_path, "glsl_source.h"); -# Keep the old FXAA for now -print "Warning: the rebuilding of GSdx ogl shader was temporary disabled\n"; -print "It will be reenabled when we got time to test Asmodean new fxaa shader\n"; -#glsl2h($gsdx_path, $gsdx_out, \@gsdx_res); +glsl2h($gsdx_path, $gsdx_out, \@gsdx_res); my @zz_res = qw/ps2hw_gl4.glsl/; my $zz_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "zzogl-pg", "opengl"); diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 3433389ebd..0fc30f12cf 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -286,7 +286,7 @@ bool GSDeviceOGL::Create(GSWnd* wnd) fxaa_macro += "#define FXAA_GATHER4_ALPHA 1\n"; } m_fxaa.cb = new GSUniformBufferOGL(g_fxaa_cb_index, sizeof(FXAAConstantBuffer)); - m_fxaa.ps = m_shader->Compile("fxaa.fx", "ps_main", GL_FRAGMENT_SHADER, fxaa_fx, fxaa_macro); + m_fxaa.ps = m_shader->Compile("fxaa.fx", "ps_main", GL_FRAGMENT_SHADER, old_fxaa_fx, fxaa_macro); // **************************************************************** // DATE diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index c33276b2ab..94a85d7141 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -476,7 +476,7 @@ class GSDeviceOGL : public GSDevice struct { GLuint vs; // program object - GLuint ps[9]; // program object + GLuint ps[10]; // program object GLuint ln; // sampler object GLuint pt; // sampler object GSDepthStencilOGL* dss; diff --git a/plugins/GSdx/res/convert.glsl b/plugins/GSdx/res/convert.glsl index b2f715e15a..381e9a42b0 100644 --- a/plugins/GSdx/res/convert.glsl +++ b/plugins/GSdx/res/convert.glsl @@ -111,15 +111,6 @@ vec4 sample_c() return texture(TextureSampler, PSin_t ); } -//uniform vec4 mask[4] = vec4[4] -//( -// vec4(1, 0, 0, 0), -// vec4(0, 1, 0, 0), -// vec4(0, 0, 1, 0), -// vec4(1, 1, 1, 0) -//); - - vec4 ps_crt(uint i) { vec4 mask[4] = vec4[4] @@ -132,12 +123,12 @@ vec4 ps_crt(uint i) return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f); } -vec4 ps_scanlines(int i) +vec4 ps_scanlines(uint i) { - float4 mask[2] = + vec4 mask[2] = { - float4(1, 1, 1, 0), - float4(0, 0, 0, 0) + vec4(1, 1, 1, 0), + vec4(0, 0, 0, 0) }; return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f); @@ -188,7 +179,7 @@ void ps_main5() // scanlines #ifdef ps_main6 void ps_main6() // diagonal { - uvec4 p = uvec4(PSin_p); + highp uvec4 p = uvec4(PSin_p); vec4 c = ps_crt((p.x + (p.y % 3u)) % 3u); @@ -199,7 +190,7 @@ void ps_main6() // diagonal #ifdef ps_main8 void ps_main8() // triangular { - uvec4 p = uvec4(PSin_p); + highp uvec4 p = uvec4(PSin_p); vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u); @@ -207,6 +198,26 @@ void ps_main8() // triangular } #endif +#ifdef ps_main9 +void ps_main9() +{ + + const float PI = 3.14159265359f; + + vec2 texdim = vec2(textureSize(TextureSampler, 0)); + + vec4 c; + if (dFdy(PSin_t.y) * PSin_t.y > 0.5f) { + c = sample_c(); + } else { + float factor = (0.9f - 0.4f * cos(2.0f * PI * PSin_t.y * texdim.y)); + c = factor * texture(TextureSampler, vec2(PSin_t.x, (floor(PSin_t.y * texdim.y) + 0.5f) / texdim.y)); + } + + SV_Target0 = c; +} +#endif + // Used for DATE (stencil) // DATM == 1 #ifdef ps_main2 diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 64cd1b48aa..0d234eed6f 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -136,15 +136,6 @@ static const char* convert_glsl = " return texture(TextureSampler, PSin_t );\n" "}\n" "\n" - "//uniform vec4 mask[4] = vec4[4]\n" - "//(\n" - "// vec4(1, 0, 0, 0),\n" - "// vec4(0, 1, 0, 0),\n" - "// vec4(0, 0, 1, 0),\n" - "// vec4(1, 1, 1, 0)\n" - "//);\n" - "\n" - "\n" "vec4 ps_crt(uint i)\n" "{\n" " vec4 mask[4] = vec4[4]\n" @@ -157,12 +148,12 @@ static const char* convert_glsl = " return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n" "}\n" "\n" - "vec4 ps_scanlines(int i)\n" + "vec4 ps_scanlines(uint i)\n" "{\n" - " float4 mask[2] =\n" + " vec4 mask[2] =\n" " {\n" - " float4(1, 1, 1, 0),\n" - " float4(0, 0, 0, 0)\n" + " vec4(1, 1, 1, 0),\n" + " vec4(0, 0, 0, 0)\n" " };\n" "\n" " return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);\n" @@ -213,7 +204,7 @@ static const char* convert_glsl = "#ifdef ps_main6\n" "void ps_main6() // diagonal\n" "{\n" - " uvec4 p = uvec4(PSin_p);\n" + " highp uvec4 p = uvec4(PSin_p);\n" "\n" " vec4 c = ps_crt((p.x + (p.y % 3u)) % 3u);\n" "\n" @@ -224,7 +215,7 @@ static const char* convert_glsl = "#ifdef ps_main8\n" "void ps_main8() // triangular\n" "{\n" - " uvec4 p = uvec4(PSin_p);\n" + " highp uvec4 p = uvec4(PSin_p);\n" "\n" " vec4 c = ps_crt(((p.x + ((p.y >> 1u) & 1u) * 3u) >> 1u) % 3u);\n" "\n" @@ -232,6 +223,26 @@ static const char* convert_glsl = "}\n" "#endif\n" "\n" + "#ifdef ps_main9\n" + "void ps_main9()\n" + "{\n" + "\n" + " const float PI = 3.14159265359f;\n" + "\n" + " vec2 texdim = vec2(textureSize(TextureSampler, 0)); \n" + "\n" + " vec4 c;\n" + " if (dFdy(PSin_t.y) * PSin_t.y > 0.5f) {\n" + " c = sample_c(); \n" + " } else {\n" + " float factor = (0.9f - 0.4f * cos(2.0f * PI * PSin_t.y * texdim.y));\n" + " c = factor * texture(TextureSampler, vec2(PSin_t.x, (floor(PSin_t.y * texdim.y) + 0.5f) / texdim.y));\n" + " }\n" + "\n" + " SV_Target0 = c;\n" + "}\n" + "#endif\n" + "\n" "// Used for DATE (stencil)\n" "// DATM == 1\n" "#ifdef ps_main2\n" @@ -1653,7 +1664,7 @@ static const char* tfx_glsl = "#endif\n" ; -static const char* fxaa_fx = +static const char* old_fxaa_fx = "#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) // make safe to include in resource file to enforce dependency\n" "\n" "#ifndef FXAA_GLSL_130\n" diff --git a/plugins/GSdx/res/old_fxaa.fx b/plugins/GSdx/res/old_fxaa.fx new file mode 100644 index 0000000000..dab7047852 --- /dev/null +++ b/plugins/GSdx/res/old_fxaa.fx @@ -0,0 +1,1254 @@ +#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) // make safe to include in resource file to enforce dependency + +#ifndef FXAA_GLSL_130 + #define FXAA_GLSL_130 0 +#endif + +#define FXAA_PC 1 +#define FXAA_QUALITY_SUBPIX 0.0 + +#ifdef SHADER_MODEL +#if SHADER_MODEL >= 0x400 + +#if SHADER_MODEL >= 0x500 + #define FXAA_HLSL_5 1 +#else + #define FXAA_HLSL_4 1 +#endif + +Texture2D Texture; +SamplerState TextureSampler; + +cbuffer cb0 +{ + float4 _rcpFrame; + float4 _rcpFrameOpt; +}; + +struct PS_INPUT +{ + float4 p : SV_Position; + float2 t : TEXCOORD0; +}; + +struct PS_OUTPUT +{ + float4 c : SV_Target0; +}; + +#elif SHADER_MODEL <= 0x300 + +#define FXAA_HLSL_3 1 + +sampler Texture : register(s0); + +float4 _rcpFrame : register(c0); +float4 _rcpFrameOpt : register(c1); + +struct PS_INPUT +{ +#if SHADER_MODEL < 0x300 + float4 p : TEXCOORD1; +#else + float4 p : VPOS; +#endif + float2 t : TEXCOORD0; +}; + +struct PS_OUTPUT +{ + float4 c : COLOR; +}; + +#endif +#endif + + +#if (FXAA_GLSL_130 == 1) +struct vertex_basic +{ + vec4 p; + vec2 t; +}; + +#ifdef DISABLE_GL42 +layout(std140) uniform cb13 +#else +layout(std140, binding = 13) uniform cb13 +#endif +{ + vec4 _rcpFrame; + vec4 _rcpFrameOpt; +}; + +#ifdef ENABLE_BINDLESS_TEX +layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; +#else +#ifdef DISABLE_GL42 +uniform sampler2D TextureSampler; +#else +layout(binding = 0) uniform sampler2D TextureSampler; +#endif +#endif + +#if !pGL_ES && __VERSION__ > 140 + +in SHADER +{ + vec4 p; + vec2 t; +} PSin; + +#define PSin_p (PSin.p) +#define PSin_t (PSin.t) + +#else + +#ifdef DISABLE_SSO +in vec4 SHADERp; +in vec2 SHADERt; +#else +layout(location = 0) in vec4 SHADERp; +layout(location = 1) in vec2 SHADERt; +#endif +#define PSin_p SHADERp +#define PSin_t SHADERt + +#endif + +layout(location = 0) out vec4 SV_Target0; + +#endif + +/*============================================================================ + + + NVIDIA FXAA 3.10 by TIMOTHY LOTTES + + +------------------------------------------------------------------------------ +COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED. +------------------------------------------------------------------------------ +TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED +*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA +OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR +CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR +LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, +OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE +THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + +------------------------------------------------------------------------------ + INTEGRATION CHECKLIST +------------------------------------------------------------------------------ +(1.) +In the shader source, +setup defines for the desired configuration. +Example, + + #define FXAA_PC 1 + #define FXAA_HLSL_3 1 + #define FXAA_QUALITY_PRESET 12 + #define FXAA_QUALITY_EDGE_THRESHOLD (1.0/6.0) + #define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/12.0) + +(2.) +Then include this file, + + #include "Fxaa3.h" + +(3.) +Then call the FXAA pixel shader from within your desired shader, + + return FxaaPixelShader(pos, posPos, tex, rcpFrame, rcpFrameOpt); + +(4.) +Insure pass prior to FXAA outputs RGBL. +See next section. + +(5.) +Setup engine to provide "rcpFrame" and "rcpFrameOpt" constants. +Not using constants will result in a performance loss. + + // {x_} = 1.0/screenWidthInPixels + // {_y} = 1.0/screenHeightInPixels + float2 rcpFrame + + // This must be from a constant/uniform. + // {x___} = 2.0/screenWidthInPixels + // {_y__} = 2.0/screenHeightInPixels + // {__z_} = 0.5/screenWidthInPixels + // {___w} = 0.5/screenHeightInPixels + float4 rcpFrameOpt + +(5.a.) +Optionally change to this for sharper FXAA Console, + + // This must be from a constant/uniform. + // {x___} = 2.0/screenWidthInPixels + // {_y__} = 2.0/screenHeightInPixels + // {__z_} = 0.333/screenWidthInPixels + // {___w} = 0.333/screenHeightInPixels + float4 rcpFrameOpt + +(6.) +Have FXAA vertex shader run as a full screen triangle, +and output "pos" and "posPos" such that inputs in the pixel shader provide, + + // {xy} = center of pixel + float2 pos, + + // {xy__} = upper left of pixel + // {__zw} = lower right of pixel + float4 posPos, + +(7.) +Insure the texture sampler used by FXAA is set to bilinear filtering. + + +------------------------------------------------------------------------------ + INTEGRATION - RGBL AND COLORSPACE +------------------------------------------------------------------------------ +FXAA3 requires RGBL as input. + +RGB should be LDR (low dynamic range). +Specifically do FXAA after tonemapping. + +RGB data as returned by a texture fetch can be linear or non-linear. +Note an "sRGB format" texture counts as linear, +because the result of a texture fetch is linear data. +Regular "RGBA8" textures in the sRGB colorspace are non-linear. + +Luma must be stored in the alpha channel prior to running FXAA. +This luma should be in a perceptual space (could be gamma 2.0). +Example pass before FXAA where output is gamma 2.0 encoded, + + color.rgb = ToneMap(color.rgb); // linear color output + color.rgb = sqrt(color.rgb); // gamma 2.0 color output + return color; + +To use FXAA, + + color.rgb = ToneMap(color.rgb); // linear color output + color.rgb = sqrt(color.rgb); // gamma 2.0 color output + color.a = dot(color.rgb, float3(0.299, 0.587, 0.114)); // compute luma + return color; + +Another example where output is linear encoded, +say for instance writing to an sRGB formated render target, +where the render target does the conversion back to sRGB after blending, + + color.rgb = ToneMap(color.rgb); // linear color output + return color; + +To use FXAA, + + color.rgb = ToneMap(color.rgb); // linear color output + color.a = sqrt(dot(color.rgb, float3(0.299, 0.587, 0.114))); // compute luma + return color; + +Getting luma correct is required for the algorithm to work correctly. + + +------------------------------------------------------------------------------ + BEING LINEARLY CORRECT? +------------------------------------------------------------------------------ +Applying FXAA to a framebuffer with linear RGB color will look worse. +This is very counter intuitive, but happends to be true in this case. +The reason is because dithering artifacts will be more visiable +in a linear colorspace. + + +------------------------------------------------------------------------------ + COMPLEX INTEGRATION +------------------------------------------------------------------------------ +Q. What if the engine is blending into RGB before wanting to run FXAA? + +A. In the last opaque pass prior to FXAA, + have the pass write out luma into alpha. + Then blend into RGB only. + FXAA should be able to run ok + assuming the blending pass did not any add aliasing. + This should be the common case for particles and common blending passes. + +============================================================================*/ + +/*============================================================================ + + INTEGRATION KNOBS + +============================================================================*/ +// +// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE). +// FXAA_360_OPT is a prototype for the new optimized 360 version. +// +// 1 = Use API. +// 0 = Don't use API. +// +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_PS3 + #define FXAA_PS3 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_360 + #define FXAA_360 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_360_OPT + #define FXAA_360_OPT 0 +#endif +/*==========================================================================*/ +#ifndef FXAA_PC + // + // FXAA Quality + // The high quality PC algorithm. + // + #define FXAA_PC 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_PC_CONSOLE + // + // The console algorithm for PC is included + // for developers targeting really low spec machines. + // + #define FXAA_PC_CONSOLE 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GLSL_120 + #define FXAA_GLSL_120 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GLSL_130 + #define FXAA_GLSL_130 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_3 + #define FXAA_HLSL_3 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_4 + #define FXAA_HLSL_4 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_HLSL_5 + #define FXAA_HLSL_5 0 +#endif +/*==========================================================================*/ +#ifndef FXAA_EARLY_EXIT + // + // Controls algorithm's early exit path. + // On PS3 turning this on adds 2 cycles to the shader. + // On 360 turning this off adds 10ths of a millisecond to the shader. + // Turning this off on console will result in a more blurry image. + // So this defaults to on. + // + // 1 = On. + // 0 = Off. + // + #define FXAA_EARLY_EXIT 1 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_DISCARD + // + // Only valid for PC OpenGL currently. + // + // 1 = Use discard on pixels which don't need AA. + // For APIs which enable concurrent TEX+ROP from same surface. + // 0 = Return unchanged color on pixels which don't need AA. + // + #define FXAA_DISCARD 0 +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_FAST_PIXEL_OFFSET + // + // Used for GLSL 120 only. + // + // 1 = GL API supports fast pixel offsets + // 0 = do not use fast pixel offsets + // + #ifdef GL_EXT_gpu_shader4 + #define FXAA_FAST_PIXEL_OFFSET 1 + #endif + #ifdef GL_NV_gpu_shader5 + #define FXAA_FAST_PIXEL_OFFSET 1 + #endif + #ifdef GL_ARB_gpu_shader5 + #define FXAA_FAST_PIXEL_OFFSET 1 + #endif + #ifndef FXAA_FAST_PIXEL_OFFSET + #define FXAA_FAST_PIXEL_OFFSET 0 + #endif +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_GATHER4_ALPHA + // + // 1 = API supports gather4 on alpha channel. + // 0 = API does not support gather4 on alpha channel. + // + #if (FXAA_HLSL_5 == 1) + #define FXAA_GATHER4_ALPHA 1 + #endif + #ifdef GL_ARB_gpu_shader5 + #define FXAA_GATHER4_ALPHA 1 + #endif + #ifdef GL_NV_gpu_shader5 + #define FXAA_GATHER4_ALPHA 1 + #endif + #ifndef FXAA_GATHER4_ALPHA + #define FXAA_GATHER4_ALPHA 0 + #endif +#endif + +/*============================================================================ + FXAA CONSOLE - TUNING KNOBS +============================================================================*/ +#ifndef FXAA_CONSOLE_EDGE_SHARPNESS + // + // Consoles the sharpness of edges. + // + // Due to the PS3 being ALU bound, + // there are only two safe values here: 4 and 8. + // These options use the shaders ability to a free *|/ by 4|8. + // + // 8.0 is sharper + // 4.0 is softer + // 2.0 is really soft (good for vector graphics inputs) + // + #if 1 + #define FXAA_CONSOLE_EDGE_SHARPNESS 8.0 + #endif + #if 0 + #define FXAA_CONSOLE_EDGE_SHARPNESS 4.0 + #endif + #if 0 + #define FXAA_CONSOLE_EDGE_SHARPNESS 2.0 + #endif +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_CONSOLE_EDGE_THRESHOLD + // + // The minimum amount of local contrast required to apply algorithm. + // The console setting has a different mapping than the quality setting. + // + // This only applies when FXAA_EARLY_EXIT is 1. + // + // Due to the PS3 being ALU bound, + // there are only two safe values here: 0.25 and 0.125. + // These options use the shaders ability to a free *|/ by 4|8. + // + // 0.125 leaves less aliasing, but is softer + // 0.25 leaves more aliasing, and is sharper + // + #if 1 + #define FXAA_CONSOLE_EDGE_THRESHOLD 0.125 + #else + #define FXAA_CONSOLE_EDGE_THRESHOLD 0.25 + #endif +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_CONSOLE_EDGE_THRESHOLD_MIN + // + // Trims the algorithm from processing darks. + // The console setting has a different mapping than the quality setting. + // + // This only applies when FXAA_EARLY_EXIT is 1. + // + // This does not apply to PS3. + // PS3 was simplified to avoid more shader instructions. + // + #define FXAA_CONSOLE_EDGE_THRESHOLD_MIN 0.05 +#endif + +/*============================================================================ + FXAA QUALITY - TUNING KNOBS +============================================================================*/ +#ifndef FXAA_QUALITY_EDGE_THRESHOLD + // + // The minimum amount of local contrast required to apply algorithm. + // + // 1/3 - too little + // 1/4 - low quality + // 1/6 - default + // 1/8 - high quality (default) + // 1/16 - overkill + // + #define FXAA_QUALITY_EDGE_THRESHOLD (1.0/6.0) +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_QUALITY_EDGE_THRESHOLD_MIN + // + // Trims the algorithm from processing darks. + // + // 1/32 - visible limit + // 1/16 - high quality + // 1/12 - upper limit (default, the start of visible unfiltered edges) + // + #define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/12.0) +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_QUALITY_SUBPIX + // + // Choose the amount of sub-pixel aliasing removal. + // + // 1 - upper limit (softer) + // 3/4 - default amount of filtering + // 1/2 - lower limit (sharper, less sub-pixel aliasing removal) + // + #define FXAA_QUALITY_SUBPIX (3.0/4.0) +#endif +/*--------------------------------------------------------------------------*/ +#ifndef FXAA_QUALITY_PRESET + // + // Choose the quality preset. + // + // OPTIONS + // ----------------------------------------------------------------------- + // 10 to 15 - default medium dither (10=fastest, 15=highest quality) + // 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality) + // 39 - no dither, very expensive + // + // NOTES + // ----------------------------------------------------------------------- + // 12 = slightly faster then FXAA 3.9 and higher edge quality (default) + // 13 = about same speed as FXAA 3.9 and better than 12 + // 23 = closest to FXAA 3.9 visually and performance wise + // _ = the lowest digit is directly related to performance + // _ = the highest digit is directly related to style + // + #define FXAA_QUALITY_PRESET 12 +#endif + + +/*============================================================================ + + FXAA QUALITY - PRESETS + +============================================================================*/ + +/*============================================================================ + FXAA QUALITY - MEDIUM DITHER PRESETS +============================================================================*/ +#if (FXAA_QUALITY_PRESET == 10) + #define FXAA_QUALITY_PS 3 + #define FXAA_QUALITY_P0 1.5 + #define FXAA_QUALITY_P1 3.0 + #define FXAA_QUALITY_P2 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 11) + #define FXAA_QUALITY_PS 4 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 3.0 + #define FXAA_QUALITY_P3 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 12) + #define FXAA_QUALITY_PS 5 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 4.0 + #define FXAA_QUALITY_P4 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 13) + #define FXAA_QUALITY_PS 6 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 4.0 + #define FXAA_QUALITY_P5 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 14) + #define FXAA_QUALITY_PS 7 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 4.0 + #define FXAA_QUALITY_P6 12.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 15) + #define FXAA_QUALITY_PS 8 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 2.0 + #define FXAA_QUALITY_P6 4.0 + #define FXAA_QUALITY_P7 12.0 +#endif + +/*============================================================================ + FXAA QUALITY - LOW DITHER PRESETS +============================================================================*/ +#if (FXAA_QUALITY_PRESET == 20) + #define FXAA_QUALITY_PS 3 + #define FXAA_QUALITY_P0 1.5 + #define FXAA_QUALITY_P1 2.0 + #define FXAA_QUALITY_P2 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 21) + #define FXAA_QUALITY_PS 4 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 22) + #define FXAA_QUALITY_PS 5 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 23) + #define FXAA_QUALITY_PS 6 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 24) + #define FXAA_QUALITY_PS 7 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 3.0 + #define FXAA_QUALITY_P6 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 25) + #define FXAA_QUALITY_PS 8 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 2.0 + #define FXAA_QUALITY_P6 4.0 + #define FXAA_QUALITY_P7 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 26) + #define FXAA_QUALITY_PS 9 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 2.0 + #define FXAA_QUALITY_P6 2.0 + #define FXAA_QUALITY_P7 4.0 + #define FXAA_QUALITY_P8 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 27) + #define FXAA_QUALITY_PS 10 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 2.0 + #define FXAA_QUALITY_P6 2.0 + #define FXAA_QUALITY_P7 2.0 + #define FXAA_QUALITY_P8 4.0 + #define FXAA_QUALITY_P9 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 28) + #define FXAA_QUALITY_PS 11 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 2.0 + #define FXAA_QUALITY_P6 2.0 + #define FXAA_QUALITY_P7 2.0 + #define FXAA_QUALITY_P8 2.0 + #define FXAA_QUALITY_P9 4.0 + #define FXAA_QUALITY_P10 8.0 +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_QUALITY_PRESET == 29) + #define FXAA_QUALITY_PS 12 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.5 + #define FXAA_QUALITY_P2 2.0 + #define FXAA_QUALITY_P3 2.0 + #define FXAA_QUALITY_P4 2.0 + #define FXAA_QUALITY_P5 2.0 + #define FXAA_QUALITY_P6 2.0 + #define FXAA_QUALITY_P7 2.0 + #define FXAA_QUALITY_P8 2.0 + #define FXAA_QUALITY_P9 2.0 + #define FXAA_QUALITY_P10 4.0 + #define FXAA_QUALITY_P11 8.0 +#endif + +/*============================================================================ + FXAA QUALITY - EXTREME QUALITY +============================================================================*/ +#if (FXAA_QUALITY_PRESET == 39) + #define FXAA_QUALITY_PS 12 + #define FXAA_QUALITY_P0 1.0 + #define FXAA_QUALITY_P1 1.0 + #define FXAA_QUALITY_P2 1.0 + #define FXAA_QUALITY_P3 1.0 + #define FXAA_QUALITY_P4 1.0 + #define FXAA_QUALITY_P5 1.5 + #define FXAA_QUALITY_P6 2.0 + #define FXAA_QUALITY_P7 2.0 + #define FXAA_QUALITY_P8 2.0 + #define FXAA_QUALITY_P9 2.0 + #define FXAA_QUALITY_P10 4.0 + #define FXAA_QUALITY_P11 8.0 +#endif + + + +/*============================================================================ + + API PORTING + +============================================================================*/ +#if (FXAA_GLSL_120 == 1) + // Requires, + // #version 120 + // And at least, + // #extension GL_EXT_gpu_shader4 : enable + // (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9) + #define half float + #define half2 vec2 + #define half3 vec3 + #define half4 vec4 + #define int2 ivec2 + #define float2 vec2 + #define float3 vec3 + #define float4 vec4 + #define FxaaInt2 ivec2 + #define FxaaFloat2 vec2 + #define FxaaFloat3 vec3 + #define FxaaFloat4 vec4 + #define FxaaDiscard discard + #define FxaaDot3(a, b) dot(a, b) + #define FxaaSat(x) clamp(x, 0.0, 1.0) + #define FxaaLerp(x,y,s) mix(x,y,s) + #define FxaaTex sampler2D + #define FxaaTexTop(t, p) texture2DLod(t, p, 0.0) + #if (FXAA_FAST_PIXEL_OFFSET == 1) + #define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o) + #else + #define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0) + #endif + #if (FXAA_GATHER4_ALPHA == 1) + // use #extension GL_ARB_gpu_shader5 : enable + #define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3) + #define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3) + #endif +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_GLSL_130 == 1) + // Requires "#version 130" or better + #define half float + #define half2 vec2 + #define half3 vec3 + #define half4 vec4 + #define int2 ivec2 + #define float2 vec2 + #define float3 vec3 + #define float4 vec4 + #define FxaaInt2 ivec2 + #define FxaaFloat2 vec2 + #define FxaaFloat3 vec3 + #define FxaaFloat4 vec4 + #define FxaaDiscard discard + #define FxaaDot3(a, b) dot(a, b) + #define FxaaSat(x) clamp(x, 0.0, 1.0) + #define FxaaLerp(x,y,s) mix(x,y,s) + #define FxaaTex sampler2D + #define FxaaTexTop(t, p) textureLod(t, p, 0.0) + #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o) + #if (FXAA_GATHER4_ALPHA == 1) + // use #extension GL_ARB_gpu_shader5 : enable + #define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3) + #define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3) + #endif +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1) + #define int2 float2 + #define FxaaInt2 float2 + #define FxaaFloat2 float2 + #define FxaaFloat3 float3 + #define FxaaFloat4 float4 + #define FxaaDiscard clip(-1) + #define FxaaDot3(a, b) dot(a, b) + #define FxaaSat(x) saturate(x) + #define FxaaLerp(x,y,s) lerp(x,y,s) + #define FxaaTex sampler2D + #define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0)) + #define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0)) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_4 == 1) + #define FxaaInt2 int2 + #define FxaaFloat2 float2 + #define FxaaFloat3 float3 + #define FxaaFloat4 float4 + #define FxaaDiscard clip(-1) + #define FxaaDot3(a, b) dot(a, b) + #define FxaaSat(x) saturate(x) + #define FxaaLerp(x,y,s) lerp(x,y,s) + struct FxaaTex { SamplerState smpl; Texture2D tex; }; + #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) + #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) +#endif +/*--------------------------------------------------------------------------*/ +#if (FXAA_HLSL_5 == 1) + #define FxaaInt2 int2 + #define FxaaFloat2 float2 + #define FxaaFloat3 float3 + #define FxaaFloat4 float4 + #define FxaaDiscard clip(-1) + #define FxaaDot3(a, b) dot(a, b) + #define FxaaSat(x) saturate(x) + #define FxaaLerp(x,y,s) lerp(x,y,s) + struct FxaaTex { SamplerState smpl; Texture2D tex; }; + #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0) + #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o) + #define FxaaTexAlpha4(t, p, r) t.tex.GatherAlpha(t.smpl, p) + #define FxaaTexOffAlpha4(t, p, o, r) t.tex.GatherAlpha(t.smpl, p, o) +#endif + + + +/*============================================================================ + + FXAA3 QUALITY - PC + +============================================================================*/ +#if (FXAA_PC == 1) +/*--------------------------------------------------------------------------*/ +float4 FxaaPixelShader( + // {xy} = center of pixel + float2 pos, + // {xyzw} = not used on FXAA3 Quality + float4 posPos, + // {rgb_} = color in linear or perceptual color space + // {___a} = luma in perceptual color space (not linear) + FxaaTex tex, + // This must be from a constant/uniform. + // {x_} = 1.0/screenWidthInPixels + // {_y} = 1.0/screenHeightInPixels + float2 rcpFrame, + // {xyzw} = not used on FXAA3 Quality + float4 rcpFrameOpt +) { +/*--------------------------------------------------------------------------*/ + float2 posM; + posM.x = pos.x; + posM.y = pos.y; + #if (FXAA_GATHER4_ALPHA == 1) + #if (FXAA_DISCARD == 0) + float4 rgbyM = FxaaTexTop(tex, posM); + #define lumaM rgbyM.w + #endif + float4 luma4A = FxaaTexAlpha4(tex, posM, rcpFrame.xy); + float4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1), rcpFrame.xy); + #if (FXAA_DISCARD == 1) + #define lumaM luma4A.w + #endif + #define lumaE luma4A.z + #define lumaS luma4A.x + #define lumaSE luma4A.y + #define lumaNW luma4B.w + #define lumaN luma4B.z + #define lumaW luma4B.x + #else + float4 rgbyM = FxaaTexTop(tex, posM); + #define lumaM rgbyM.w + float lumaS = FxaaTexOff(tex, posM, FxaaInt2( 0, 1), rcpFrame.xy).w; + float lumaE = FxaaTexOff(tex, posM, FxaaInt2( 1, 0), rcpFrame.xy).w; + float lumaN = FxaaTexOff(tex, posM, FxaaInt2( 0,-1), rcpFrame.xy).w; + float lumaW = FxaaTexOff(tex, posM, FxaaInt2(-1, 0), rcpFrame.xy).w; + #endif +/*--------------------------------------------------------------------------*/ + float maxSM = max(lumaS, lumaM); + float minSM = min(lumaS, lumaM); + float maxESM = max(lumaE, maxSM); + float minESM = min(lumaE, minSM); + float maxWN = max(lumaN, lumaW); + float minWN = min(lumaN, lumaW); + float rangeMax = max(maxWN, maxESM); + float rangeMin = min(minWN, minESM); + float rangeMaxScaled = rangeMax * FXAA_QUALITY_EDGE_THRESHOLD; + float range = rangeMax - rangeMin; + float rangeMaxClamped = max(FXAA_QUALITY_EDGE_THRESHOLD_MIN, rangeMaxScaled); + bool earlyExit = range < rangeMaxClamped; +/*--------------------------------------------------------------------------*/ + if(earlyExit) + #if (FXAA_DISCARD == 1) + FxaaDiscard; + #else + return rgbyM; + #endif +/*--------------------------------------------------------------------------*/ + #if (FXAA_GATHER4_ALPHA == 0) + float lumaNW = FxaaTexOff(tex, posM, FxaaInt2(-1,-1), rcpFrame.xy).w; + float lumaSE = FxaaTexOff(tex, posM, FxaaInt2( 1, 1), rcpFrame.xy).w; + float lumaNE = FxaaTexOff(tex, posM, FxaaInt2( 1,-1), rcpFrame.xy).w; + float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w; + #else + float lumaNE = FxaaTexOff(tex, posM, FxaaInt2(1, -1), rcpFrame.xy).w; + float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w; + #endif +/*--------------------------------------------------------------------------*/ + float lumaNS = lumaN + lumaS; + float lumaWE = lumaW + lumaE; + float subpixRcpRange = 1.0/range; + float subpixNSWE = lumaNS + lumaWE; + float edgeHorz1 = (-2.0 * lumaM) + lumaNS; + float edgeVert1 = (-2.0 * lumaM) + lumaWE; +/*--------------------------------------------------------------------------*/ + float lumaNESE = lumaNE + lumaSE; + float lumaNWNE = lumaNW + lumaNE; + float edgeHorz2 = (-2.0 * lumaE) + lumaNESE; + float edgeVert2 = (-2.0 * lumaN) + lumaNWNE; +/*--------------------------------------------------------------------------*/ + float lumaNWSW = lumaNW + lumaSW; + float lumaSWSE = lumaSW + lumaSE; + float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2); + float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2); + float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW; + float edgeVert3 = (-2.0 * lumaS) + lumaSWSE; + float edgeHorz = abs(edgeHorz3) + edgeHorz4; + float edgeVert = abs(edgeVert3) + edgeVert4; +/*--------------------------------------------------------------------------*/ + float subpixNWSWNESE = lumaNWSW + lumaNESE; + float lengthSign = rcpFrame.x; + bool horzSpan = edgeHorz >= edgeVert; + float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE; +/*--------------------------------------------------------------------------*/ + if(!horzSpan) lumaN = lumaW; + if(!horzSpan) lumaS = lumaE; + if(horzSpan) lengthSign = rcpFrame.y; + float subpixB = (subpixA * (1.0/12.0)) - lumaM; +/*--------------------------------------------------------------------------*/ + float gradientN = lumaN - lumaM; + float gradientS = lumaS - lumaM; + float lumaNN = lumaN + lumaM; + float lumaSS = lumaS + lumaM; + bool pairN = abs(gradientN) >= abs(gradientS); + float gradient = max(abs(gradientN), abs(gradientS)); + if(pairN) lengthSign = -lengthSign; + float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange); +/*--------------------------------------------------------------------------*/ + float2 posB; + posB.x = posM.x; + posB.y = posM.y; + float2 offNP; + offNP.x = (!horzSpan) ? 0.0 : rcpFrame.x; + offNP.y = ( horzSpan) ? 0.0 : rcpFrame.y; + if(!horzSpan) posB.x += lengthSign * 0.5; + if( horzSpan) posB.y += lengthSign * 0.5; +/*--------------------------------------------------------------------------*/ + float2 posN; + posN.x = posB.x - offNP.x * FXAA_QUALITY_P0; + posN.y = posB.y - offNP.y * FXAA_QUALITY_P0; + float2 posP; + posP.x = posB.x + offNP.x * FXAA_QUALITY_P0; + posP.y = posB.y + offNP.y * FXAA_QUALITY_P0; + float subpixD = ((-2.0)*subpixC) + 3.0; + float lumaEndN = FxaaTexTop(tex, posN).w; + float subpixE = subpixC * subpixC; + float lumaEndP = FxaaTexTop(tex, posP).w; +/*--------------------------------------------------------------------------*/ + if(!pairN) lumaNN = lumaSS; + float gradientScaled = gradient * 1.0/4.0; + float lumaMM = lumaM - lumaNN * 0.5; + float subpixF = subpixD * subpixE; + bool lumaMLTZero = lumaMM < 0.0; +/*--------------------------------------------------------------------------*/ + lumaEndN -= lumaNN * 0.5; + lumaEndP -= lumaNN * 0.5; + bool doneN = abs(lumaEndN) >= gradientScaled; + bool doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P1; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P1; + bool doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P1; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P1; +/*--------------------------------------------------------------------------*/ + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P2; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P2; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P2; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P2; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 3) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P3; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P3; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P3; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P3; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 4) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P4; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P4; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P4; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P4; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 5) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P5; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P5; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P5; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P5; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 6) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P6; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P6; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P6; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P6; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 7) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P7; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P7; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P7; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P7; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 8) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P8; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P8; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P8; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P8; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 9) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P9; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P9; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P9; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P9; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 10) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P10; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P10; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P10; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P10; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 11) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P11; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P11; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P11; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P11; +/*--------------------------------------------------------------------------*/ + #if (FXAA_QUALITY_PS > 12) + if(doneNP) { + if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w; + if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w; + if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5; + if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + doneP = abs(lumaEndP) >= gradientScaled; + if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P12; + if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P12; + doneNP = (!doneN) || (!doneP); + if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P12; + if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P12; +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } + #endif +/*--------------------------------------------------------------------------*/ + } +/*--------------------------------------------------------------------------*/ + float dstN = posM.x - posN.x; + float dstP = posP.x - posM.x; + if(!horzSpan) dstN = posM.y - posN.y; + if(!horzSpan) dstP = posP.y - posM.y; +/*--------------------------------------------------------------------------*/ + bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero; + float spanLength = (dstP + dstN); + bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero; + float spanLengthRcp = 1.0/spanLength; +/*--------------------------------------------------------------------------*/ + bool directionN = dstN < dstP; + float dst = min(dstN, dstP); + bool goodSpan = directionN ? goodSpanN : goodSpanP; + float subpixG = subpixF * subpixF; + float pixelOffset = (dst * (-spanLengthRcp)) + 0.5; + float subpixH = subpixG * FXAA_QUALITY_SUBPIX; +/*--------------------------------------------------------------------------*/ + float pixelOffsetGood = goodSpan ? pixelOffset : 0.0; + float pixelOffsetSubpix = max(pixelOffsetGood, subpixH); + if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign; + if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign; + #if (FXAA_DISCARD == 1) + return FxaaTexTop(tex, posM); + #else + return float4(FxaaTexTop(tex, posM).xyz, lumaM); + #endif +} +/*==========================================================================*/ +#endif + +#ifdef SHADER_MODEL +PS_OUTPUT ps_main(PS_INPUT input) +{ + PS_OUTPUT output; + + float2 pos = input.t; + float4 posPos = (float4)0; + + FxaaTex tex; + + #if SHADER_MODEL >= 0x400 + + tex.tex = Texture; + tex.smpl = TextureSampler; + + #else + + tex = Texture; + + #endif + + output.c = FxaaPixelShader(pos, posPos, tex, _rcpFrame.xy, _rcpFrameOpt); + + return output; +} +#endif + +#if (FXAA_GLSL_130 == 1) +void ps_main() +{ + vec2 pos = PSin_t; + vec4 posPos = vec4(0.0, 0.0, 0.0, 0.0); + + SV_Target0 = FxaaPixelShader(pos, posPos, TextureSampler, _rcpFrame.xy, _rcpFrameOpt); +} +#endif + +#endif