mirror of https://github.com/PCSX2/pcsx2.git
2084 lines
90 KiB
C
2084 lines
90 KiB
C
/*
|
|
* Copyright (C) 2011-2013 Gregory hainaut
|
|
* Copyright (C) 2007-2009 Gabest
|
|
*
|
|
* This file was generated by glsl2h.pl script
|
|
*
|
|
* This Program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This Program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with GNU Make; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "stdafx.h"
|
|
|
|
static const char* fxaa_fx =
|
|
"#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) // make safe to include in resource file to enforce dependency\n"
|
|
"\n"
|
|
"#define FXAA_PC 1\n"
|
|
"#define FXAA_QUALITY_SUBPIX 0.0\n"
|
|
"\n"
|
|
"#ifdef SHADER_MODEL\n"
|
|
"#if SHADER_MODEL >= 0x400\n"
|
|
"\n"
|
|
"#if SHADER_MODEL >= 0x500\n"
|
|
" #define FXAA_HLSL_5 1\n"
|
|
"#else \n"
|
|
" #define FXAA_HLSL_4 1\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"Texture2D Texture;\n"
|
|
"SamplerState TextureSampler;\n"
|
|
"\n"
|
|
"cbuffer cb0\n"
|
|
"{\n"
|
|
" float4 _rcpFrame;\n"
|
|
" float4 _rcpFrameOpt;\n"
|
|
"};\n"
|
|
"\n"
|
|
"struct PS_INPUT\n"
|
|
"{\n"
|
|
" float4 p : SV_Position;\n"
|
|
" float2 t : TEXCOORD0;\n"
|
|
"};\n"
|
|
"\n"
|
|
"struct PS_OUTPUT\n"
|
|
"{\n"
|
|
" float4 c : SV_Target0;\n"
|
|
"};\n"
|
|
"\n"
|
|
"#elif SHADER_MODEL <= 0x300\n"
|
|
"\n"
|
|
"#define FXAA_HLSL_3 1\n"
|
|
"\n"
|
|
"sampler Texture : register(s0);\n"
|
|
"\n"
|
|
"float4 _rcpFrame : register(c0);\n"
|
|
"float4 _rcpFrameOpt : register(c1);\n"
|
|
"\n"
|
|
"struct PS_INPUT\n"
|
|
"{\n"
|
|
"#if SHADER_MODEL < 0x300\n"
|
|
" float4 p : TEXCOORD1;\n"
|
|
"#else\n"
|
|
" float4 p : VPOS;\n"
|
|
"#endif\n"
|
|
" float2 t : TEXCOORD0;\n"
|
|
"};\n"
|
|
"\n"
|
|
"struct PS_OUTPUT\n"
|
|
"{\n"
|
|
" float4 c : COLOR;\n"
|
|
"};\n"
|
|
"\n"
|
|
"#endif\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"#ifdef FXAA_GLSL_130\n"
|
|
"struct vertex_basic\n"
|
|
"{\n"
|
|
" vec4 p;\n"
|
|
" vec2 t;\n"
|
|
"};\n"
|
|
"\n"
|
|
"#ifdef DISABLE_GL42\n"
|
|
"layout(std140) uniform cb13\n"
|
|
"#else\n"
|
|
"layout(std140, binding = 13) uniform cb13\n"
|
|
"#endif\n"
|
|
"{\n"
|
|
" vec4 _rcpFrame;\n"
|
|
" vec4 _rcpFrameOpt;\n"
|
|
"};\n"
|
|
"\n"
|
|
"#ifdef DISABLE_GL42\n"
|
|
"uniform sampler2D TextureSampler;\n"
|
|
"#else\n"
|
|
"layout(binding = 0) uniform sampler2D TextureSampler;\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"#if __VERSION__ > 140 && !(defined(NO_STRUCT))\n"
|
|
"layout(location = 0) in vertex_basic PSin;\n"
|
|
"#define PSin_p (PSin.p)\n"
|
|
"#define PSin_t (PSin.t)\n"
|
|
"#else\n"
|
|
"#ifdef DISABLE_SSO\n"
|
|
"in vec4 SHADERp;\n"
|
|
"in vec2 SHADERt;\n"
|
|
"#else\n"
|
|
"layout(location = 0) in vec4 SHADERp;\n"
|
|
"layout(location = 1) in vec2 SHADERt;\n"
|
|
"#endif\n"
|
|
"#define PSin_p SHADERp\n"
|
|
"#define PSin_t SHADERt\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"layout(location = 0) out vec4 SV_Target0;\n"
|
|
"\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
"\n"
|
|
" NVIDIA FXAA 3.10 by TIMOTHY LOTTES\n"
|
|
"\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED.\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED\n"
|
|
"*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS\n"
|
|
"OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF\n"
|
|
"MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA\n"
|
|
"OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR\n"
|
|
"CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR\n"
|
|
"LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION,\n"
|
|
"OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE\n"
|
|
"THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n"
|
|
"DAMAGES.\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
" INTEGRATION CHECKLIST\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"(1.)\n"
|
|
"In the shader source,\n"
|
|
"setup defines for the desired configuration.\n"
|
|
"Example,\n"
|
|
"\n"
|
|
" #define FXAA_PC 1\n"
|
|
" #define FXAA_HLSL_3 1\n"
|
|
" #define FXAA_QUALITY_PRESET 12\n"
|
|
" #define FXAA_QUALITY_EDGE_THRESHOLD (1.0/6.0)\n"
|
|
" #define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/12.0)\n"
|
|
"\n"
|
|
"(2.)\n"
|
|
"Then include this file,\n"
|
|
"\n"
|
|
" #include \"Fxaa3.h\"\n"
|
|
"\n"
|
|
"(3.)\n"
|
|
"Then call the FXAA pixel shader from within your desired shader,\n"
|
|
"\n"
|
|
" return FxaaPixelShader(pos, posPos, tex, rcpFrame, rcpFrameOpt);\n"
|
|
"\n"
|
|
"(4.)\n"
|
|
"Insure pass prior to FXAA outputs RGBL.\n"
|
|
"See next section.\n"
|
|
"\n"
|
|
"(5.)\n"
|
|
"Setup engine to provide \"rcpFrame\" and \"rcpFrameOpt\" constants.\n"
|
|
"Not using constants will result in a performance loss.\n"
|
|
"\n"
|
|
" // {x_} = 1.0/screenWidthInPixels\n"
|
|
" // {_y} = 1.0/screenHeightInPixels\n"
|
|
" float2 rcpFrame\n"
|
|
"\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {x___} = 2.0/screenWidthInPixels\n"
|
|
" // {_y__} = 2.0/screenHeightInPixels\n"
|
|
" // {__z_} = 0.5/screenWidthInPixels\n"
|
|
" // {___w} = 0.5/screenHeightInPixels\n"
|
|
" float4 rcpFrameOpt\n"
|
|
"\n"
|
|
"(5.a.) \n"
|
|
"Optionally change to this for sharper FXAA Console,\n"
|
|
"\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {x___} = 2.0/screenWidthInPixels\n"
|
|
" // {_y__} = 2.0/screenHeightInPixels\n"
|
|
" // {__z_} = 0.333/screenWidthInPixels\n"
|
|
" // {___w} = 0.333/screenHeightInPixels\n"
|
|
" float4 rcpFrameOpt\n"
|
|
"\n"
|
|
"(6.)\n"
|
|
"Have FXAA vertex shader run as a full screen triangle,\n"
|
|
"and output \"pos\" and \"posPos\" such that inputs in the pixel shader provide,\n"
|
|
"\n"
|
|
" // {xy} = center of pixel\n"
|
|
" float2 pos,\n"
|
|
"\n"
|
|
" // {xy__} = upper left of pixel\n"
|
|
" // {__zw} = lower right of pixel\n"
|
|
" float4 posPos,\n"
|
|
"\n"
|
|
"(7.)\n"
|
|
"Insure the texture sampler used by FXAA is set to bilinear filtering.\n"
|
|
"\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
" INTEGRATION - RGBL AND COLORSPACE\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"FXAA3 requires RGBL as input.\n"
|
|
"\n"
|
|
"RGB should be LDR (low dynamic range).\n"
|
|
"Specifically do FXAA after tonemapping.\n"
|
|
"\n"
|
|
"RGB data as returned by a texture fetch can be linear or non-linear.\n"
|
|
"Note an \"sRGB format\" texture counts as linear,\n"
|
|
"because the result of a texture fetch is linear data.\n"
|
|
"Regular \"RGBA8\" textures in the sRGB colorspace are non-linear.\n"
|
|
"\n"
|
|
"Luma must be stored in the alpha channel prior to running FXAA.\n"
|
|
"This luma should be in a perceptual space (could be gamma 2.0).\n"
|
|
"Example pass before FXAA where output is gamma 2.0 encoded,\n"
|
|
"\n"
|
|
" color.rgb = ToneMap(color.rgb); // linear color output\n"
|
|
" color.rgb = sqrt(color.rgb); // gamma 2.0 color output\n"
|
|
" return color;\n"
|
|
"\n"
|
|
"To use FXAA,\n"
|
|
"\n"
|
|
" color.rgb = ToneMap(color.rgb); // linear color output\n"
|
|
" color.rgb = sqrt(color.rgb); // gamma 2.0 color output\n"
|
|
" color.a = dot(color.rgb, float3(0.299, 0.587, 0.114)); // compute luma\n"
|
|
" return color;\n"
|
|
"\n"
|
|
"Another example where output is linear encoded,\n"
|
|
"say for instance writing to an sRGB formated render target,\n"
|
|
"where the render target does the conversion back to sRGB after blending,\n"
|
|
"\n"
|
|
" color.rgb = ToneMap(color.rgb); // linear color output\n"
|
|
" return color;\n"
|
|
"\n"
|
|
"To use FXAA,\n"
|
|
"\n"
|
|
" color.rgb = ToneMap(color.rgb); // linear color output\n"
|
|
" color.a = sqrt(dot(color.rgb, float3(0.299, 0.587, 0.114))); // compute luma\n"
|
|
" return color;\n"
|
|
"\n"
|
|
"Getting luma correct is required for the algorithm to work correctly.\n"
|
|
"\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
" BEING LINEARLY CORRECT?\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"Applying FXAA to a framebuffer with linear RGB color will look worse.\n"
|
|
"This is very counter intuitive, but happends to be true in this case.\n"
|
|
"The reason is because dithering artifacts will be more visiable \n"
|
|
"in a linear colorspace.\n"
|
|
"\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
" COMPLEX INTEGRATION\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"Q. What if the engine is blending into RGB before wanting to run FXAA?\n"
|
|
"\n"
|
|
"A. In the last opaque pass prior to FXAA,\n"
|
|
" have the pass write out luma into alpha.\n"
|
|
" Then blend into RGB only.\n"
|
|
" FXAA should be able to run ok\n"
|
|
" assuming the blending pass did not any add aliasing.\n"
|
|
" This should be the common case for particles and common blending passes.\n"
|
|
"\n"
|
|
"============================================================================*/\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" INTEGRATION KNOBS\n"
|
|
"\n"
|
|
"============================================================================*/\n"
|
|
"//\n"
|
|
"// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE).\n"
|
|
"// FXAA_360_OPT is a prototype for the new optimized 360 version.\n"
|
|
"//\n"
|
|
"// 1 = Use API.\n"
|
|
"// 0 = Don't use API.\n"
|
|
"//\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_PS3\n"
|
|
" #define FXAA_PS3 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_360\n"
|
|
" #define FXAA_360 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_360_OPT\n"
|
|
" #define FXAA_360_OPT 0\n"
|
|
"#endif\n"
|
|
"/*==========================================================================*/\n"
|
|
"#ifndef FXAA_PC\n"
|
|
" //\n"
|
|
" // FXAA Quality\n"
|
|
" // The high quality PC algorithm.\n"
|
|
" //\n"
|
|
" #define FXAA_PC 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_PC_CONSOLE\n"
|
|
" //\n"
|
|
" // The console algorithm for PC is included\n"
|
|
" // for developers targeting really low spec machines.\n"
|
|
" //\n"
|
|
" #define FXAA_PC_CONSOLE 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_GLSL_120\n"
|
|
" #define FXAA_GLSL_120 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_GLSL_130\n"
|
|
" #define FXAA_GLSL_130 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_HLSL_3\n"
|
|
" #define FXAA_HLSL_3 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_HLSL_4\n"
|
|
" #define FXAA_HLSL_4 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_HLSL_5\n"
|
|
" #define FXAA_HLSL_5 0\n"
|
|
"#endif\n"
|
|
"/*==========================================================================*/\n"
|
|
"#ifndef FXAA_EARLY_EXIT\n"
|
|
" //\n"
|
|
" // Controls algorithm's early exit path.\n"
|
|
" // On PS3 turning this on adds 2 cycles to the shader.\n"
|
|
" // On 360 turning this off adds 10ths of a millisecond to the shader.\n"
|
|
" // Turning this off on console will result in a more blurry image.\n"
|
|
" // So this defaults to on.\n"
|
|
" //\n"
|
|
" // 1 = On.\n"
|
|
" // 0 = Off.\n"
|
|
" //\n"
|
|
" #define FXAA_EARLY_EXIT 1\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_DISCARD\n"
|
|
" //\n"
|
|
" // Only valid for PC OpenGL currently.\n"
|
|
" //\n"
|
|
" // 1 = Use discard on pixels which don't need AA.\n"
|
|
" // For APIs which enable concurrent TEX+ROP from same surface.\n"
|
|
" // 0 = Return unchanged color on pixels which don't need AA.\n"
|
|
" //\n"
|
|
" #define FXAA_DISCARD 0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_FAST_PIXEL_OFFSET\n"
|
|
" //\n"
|
|
" // Used for GLSL 120 only.\n"
|
|
" //\n"
|
|
" // 1 = GL API supports fast pixel offsets\n"
|
|
" // 0 = do not use fast pixel offsets\n"
|
|
" //\n"
|
|
" #ifdef GL_EXT_gpu_shader4\n"
|
|
" #define FXAA_FAST_PIXEL_OFFSET 1\n"
|
|
" #endif\n"
|
|
" #ifdef GL_NV_gpu_shader5\n"
|
|
" #define FXAA_FAST_PIXEL_OFFSET 1\n"
|
|
" #endif\n"
|
|
" #ifdef GL_ARB_gpu_shader5\n"
|
|
" #define FXAA_FAST_PIXEL_OFFSET 1\n"
|
|
" #endif\n"
|
|
" #ifndef FXAA_FAST_PIXEL_OFFSET\n"
|
|
" #define FXAA_FAST_PIXEL_OFFSET 0\n"
|
|
" #endif\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_GATHER4_ALPHA\n"
|
|
" //\n"
|
|
" // 1 = API supports gather4 on alpha channel.\n"
|
|
" // 0 = API does not support gather4 on alpha channel.\n"
|
|
" //\n"
|
|
" #if (FXAA_HLSL_5 == 1)\n"
|
|
" #define FXAA_GATHER4_ALPHA 1\n"
|
|
" #endif\n"
|
|
" #ifdef GL_ARB_gpu_shader5\n"
|
|
" #define FXAA_GATHER4_ALPHA 1\n"
|
|
" #endif\n"
|
|
" #ifdef GL_NV_gpu_shader5\n"
|
|
" #define FXAA_GATHER4_ALPHA 1\n"
|
|
" #endif\n"
|
|
" #ifndef FXAA_GATHER4_ALPHA\n"
|
|
" #define FXAA_GATHER4_ALPHA 0\n"
|
|
" #endif\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
" FXAA CONSOLE - TUNING KNOBS\n"
|
|
"============================================================================*/\n"
|
|
"#ifndef FXAA_CONSOLE_EDGE_SHARPNESS\n"
|
|
" //\n"
|
|
" // Consoles the sharpness of edges.\n"
|
|
" //\n"
|
|
" // Due to the PS3 being ALU bound,\n"
|
|
" // there are only two safe values here: 4 and 8.\n"
|
|
" // These options use the shaders ability to a free *|/ by 4|8.\n"
|
|
" //\n"
|
|
" // 8.0 is sharper\n"
|
|
" // 4.0 is softer\n"
|
|
" // 2.0 is really soft (good for vector graphics inputs)\n"
|
|
" //\n"
|
|
" #if 1\n"
|
|
" #define FXAA_CONSOLE_EDGE_SHARPNESS 8.0\n"
|
|
" #endif\n"
|
|
" #if 0\n"
|
|
" #define FXAA_CONSOLE_EDGE_SHARPNESS 4.0\n"
|
|
" #endif\n"
|
|
" #if 0\n"
|
|
" #define FXAA_CONSOLE_EDGE_SHARPNESS 2.0\n"
|
|
" #endif\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_CONSOLE_EDGE_THRESHOLD\n"
|
|
" //\n"
|
|
" // The minimum amount of local contrast required to apply algorithm.\n"
|
|
" // The console setting has a different mapping than the quality setting.\n"
|
|
" //\n"
|
|
" // This only applies when FXAA_EARLY_EXIT is 1.\n"
|
|
" //\n"
|
|
" // Due to the PS3 being ALU bound,\n"
|
|
" // there are only two safe values here: 0.25 and 0.125.\n"
|
|
" // These options use the shaders ability to a free *|/ by 4|8.\n"
|
|
" //\n"
|
|
" // 0.125 leaves less aliasing, but is softer\n"
|
|
" // 0.25 leaves more aliasing, and is sharper\n"
|
|
" //\n"
|
|
" #if 1\n"
|
|
" #define FXAA_CONSOLE_EDGE_THRESHOLD 0.125\n"
|
|
" #else\n"
|
|
" #define FXAA_CONSOLE_EDGE_THRESHOLD 0.25\n"
|
|
" #endif\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_CONSOLE_EDGE_THRESHOLD_MIN\n"
|
|
" //\n"
|
|
" // Trims the algorithm from processing darks.\n"
|
|
" // The console setting has a different mapping than the quality setting.\n"
|
|
" //\n"
|
|
" // This only applies when FXAA_EARLY_EXIT is 1.\n"
|
|
" //\n"
|
|
" // This does not apply to PS3.\n"
|
|
" // PS3 was simplified to avoid more shader instructions.\n"
|
|
" //\n"
|
|
" #define FXAA_CONSOLE_EDGE_THRESHOLD_MIN 0.05\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
" FXAA QUALITY - TUNING KNOBS\n"
|
|
"============================================================================*/\n"
|
|
"#ifndef FXAA_QUALITY_EDGE_THRESHOLD\n"
|
|
" //\n"
|
|
" // The minimum amount of local contrast required to apply algorithm.\n"
|
|
" //\n"
|
|
" // 1/3 - too little\n"
|
|
" // 1/4 - low quality\n"
|
|
" // 1/6 - default\n"
|
|
" // 1/8 - high quality (default)\n"
|
|
" // 1/16 - overkill\n"
|
|
" //\n"
|
|
" #define FXAA_QUALITY_EDGE_THRESHOLD (1.0/6.0)\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_QUALITY_EDGE_THRESHOLD_MIN\n"
|
|
" //\n"
|
|
" // Trims the algorithm from processing darks.\n"
|
|
" //\n"
|
|
" // 1/32 - visible limit\n"
|
|
" // 1/16 - high quality\n"
|
|
" // 1/12 - upper limit (default, the start of visible unfiltered edges)\n"
|
|
" //\n"
|
|
" #define FXAA_QUALITY_EDGE_THRESHOLD_MIN (1.0/12.0)\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_QUALITY_SUBPIX\n"
|
|
" //\n"
|
|
" // Choose the amount of sub-pixel aliasing removal.\n"
|
|
" //\n"
|
|
" // 1 - upper limit (softer)\n"
|
|
" // 3/4 - default amount of filtering\n"
|
|
" // 1/2 - lower limit (sharper, less sub-pixel aliasing removal)\n"
|
|
" //\n"
|
|
" #define FXAA_QUALITY_SUBPIX (3.0/4.0)\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#ifndef FXAA_QUALITY_PRESET\n"
|
|
" //\n"
|
|
" // Choose the quality preset.\n"
|
|
" // \n"
|
|
" // OPTIONS\n"
|
|
" // -----------------------------------------------------------------------\n"
|
|
" // 10 to 15 - default medium dither (10=fastest, 15=highest quality)\n"
|
|
" // 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality)\n"
|
|
" // 39 - no dither, very expensive \n"
|
|
" //\n"
|
|
" // NOTES\n"
|
|
" // -----------------------------------------------------------------------\n"
|
|
" // 12 = slightly faster then FXAA 3.9 and higher edge quality (default)\n"
|
|
" // 13 = about same speed as FXAA 3.9 and better than 12\n"
|
|
" // 23 = closest to FXAA 3.9 visually and performance wise\n"
|
|
" // _ = the lowest digit is directly related to performance\n"
|
|
" // _ = the highest digit is directly related to style\n"
|
|
" // \n"
|
|
" #define FXAA_QUALITY_PRESET 12\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" FXAA QUALITY - PRESETS\n"
|
|
"\n"
|
|
"============================================================================*/\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
" FXAA QUALITY - MEDIUM DITHER PRESETS\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 10)\n"
|
|
" #define FXAA_QUALITY_PS 3\n"
|
|
" #define FXAA_QUALITY_P0 1.5\n"
|
|
" #define FXAA_QUALITY_P1 3.0\n"
|
|
" #define FXAA_QUALITY_P2 12.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 11)\n"
|
|
" #define FXAA_QUALITY_PS 4\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 3.0\n"
|
|
" #define FXAA_QUALITY_P3 12.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 12)\n"
|
|
" #define FXAA_QUALITY_PS 5\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 4.0\n"
|
|
" #define FXAA_QUALITY_P4 12.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 13)\n"
|
|
" #define FXAA_QUALITY_PS 6\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 4.0\n"
|
|
" #define FXAA_QUALITY_P5 12.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 14)\n"
|
|
" #define FXAA_QUALITY_PS 7\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 4.0\n"
|
|
" #define FXAA_QUALITY_P6 12.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 15)\n"
|
|
" #define FXAA_QUALITY_PS 8\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 2.0\n"
|
|
" #define FXAA_QUALITY_P6 4.0\n"
|
|
" #define FXAA_QUALITY_P7 12.0\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
" FXAA QUALITY - LOW DITHER PRESETS\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 20)\n"
|
|
" #define FXAA_QUALITY_PS 3\n"
|
|
" #define FXAA_QUALITY_P0 1.5\n"
|
|
" #define FXAA_QUALITY_P1 2.0\n"
|
|
" #define FXAA_QUALITY_P2 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 21)\n"
|
|
" #define FXAA_QUALITY_PS 4\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 22)\n"
|
|
" #define FXAA_QUALITY_PS 5\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 23)\n"
|
|
" #define FXAA_QUALITY_PS 6\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 24)\n"
|
|
" #define FXAA_QUALITY_PS 7\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 3.0\n"
|
|
" #define FXAA_QUALITY_P6 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 25)\n"
|
|
" #define FXAA_QUALITY_PS 8\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 2.0\n"
|
|
" #define FXAA_QUALITY_P6 4.0\n"
|
|
" #define FXAA_QUALITY_P7 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 26)\n"
|
|
" #define FXAA_QUALITY_PS 9\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 2.0\n"
|
|
" #define FXAA_QUALITY_P6 2.0\n"
|
|
" #define FXAA_QUALITY_P7 4.0\n"
|
|
" #define FXAA_QUALITY_P8 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 27)\n"
|
|
" #define FXAA_QUALITY_PS 10\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 2.0\n"
|
|
" #define FXAA_QUALITY_P6 2.0\n"
|
|
" #define FXAA_QUALITY_P7 2.0\n"
|
|
" #define FXAA_QUALITY_P8 4.0\n"
|
|
" #define FXAA_QUALITY_P9 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 28)\n"
|
|
" #define FXAA_QUALITY_PS 11\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 2.0\n"
|
|
" #define FXAA_QUALITY_P6 2.0\n"
|
|
" #define FXAA_QUALITY_P7 2.0\n"
|
|
" #define FXAA_QUALITY_P8 2.0\n"
|
|
" #define FXAA_QUALITY_P9 4.0\n"
|
|
" #define FXAA_QUALITY_P10 8.0\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 29)\n"
|
|
" #define FXAA_QUALITY_PS 12\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.5\n"
|
|
" #define FXAA_QUALITY_P2 2.0\n"
|
|
" #define FXAA_QUALITY_P3 2.0\n"
|
|
" #define FXAA_QUALITY_P4 2.0\n"
|
|
" #define FXAA_QUALITY_P5 2.0\n"
|
|
" #define FXAA_QUALITY_P6 2.0\n"
|
|
" #define FXAA_QUALITY_P7 2.0\n"
|
|
" #define FXAA_QUALITY_P8 2.0\n"
|
|
" #define FXAA_QUALITY_P9 2.0\n"
|
|
" #define FXAA_QUALITY_P10 4.0\n"
|
|
" #define FXAA_QUALITY_P11 8.0\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
" FXAA QUALITY - EXTREME QUALITY\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_QUALITY_PRESET == 39)\n"
|
|
" #define FXAA_QUALITY_PS 12\n"
|
|
" #define FXAA_QUALITY_P0 1.0\n"
|
|
" #define FXAA_QUALITY_P1 1.0\n"
|
|
" #define FXAA_QUALITY_P2 1.0\n"
|
|
" #define FXAA_QUALITY_P3 1.0\n"
|
|
" #define FXAA_QUALITY_P4 1.0\n"
|
|
" #define FXAA_QUALITY_P5 1.5\n"
|
|
" #define FXAA_QUALITY_P6 2.0\n"
|
|
" #define FXAA_QUALITY_P7 2.0\n"
|
|
" #define FXAA_QUALITY_P8 2.0\n"
|
|
" #define FXAA_QUALITY_P9 2.0\n"
|
|
" #define FXAA_QUALITY_P10 4.0\n"
|
|
" #define FXAA_QUALITY_P11 8.0\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" API PORTING\n"
|
|
"\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_GLSL_120 == 1)\n"
|
|
" // Requires,\n"
|
|
" // #version 120\n"
|
|
" // And at least,\n"
|
|
" // #extension GL_EXT_gpu_shader4 : enable\n"
|
|
" // (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9)\n"
|
|
" #define half float\n"
|
|
" #define half2 vec2\n"
|
|
" #define half3 vec3\n"
|
|
" #define half4 vec4\n"
|
|
" #define int2 ivec2\n"
|
|
" #define float2 vec2\n"
|
|
" #define float3 vec3\n"
|
|
" #define float4 vec4\n"
|
|
" #define FxaaInt2 ivec2\n"
|
|
" #define FxaaFloat2 vec2\n"
|
|
" #define FxaaFloat3 vec3\n"
|
|
" #define FxaaFloat4 vec4\n"
|
|
" #define FxaaDiscard discard\n"
|
|
" #define FxaaDot3(a, b) dot(a, b)\n"
|
|
" #define FxaaSat(x) clamp(x, 0.0, 1.0)\n"
|
|
" #define FxaaLerp(x,y,s) mix(x,y,s)\n"
|
|
" #define FxaaTex sampler2D\n"
|
|
" #define FxaaTexTop(t, p) texture2DLod(t, p, 0.0)\n"
|
|
" #if (FXAA_FAST_PIXEL_OFFSET == 1)\n"
|
|
" #define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)\n"
|
|
" #else\n"
|
|
" #define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0)\n"
|
|
" #endif\n"
|
|
" #if (FXAA_GATHER4_ALPHA == 1)\n"
|
|
" // use #extension GL_ARB_gpu_shader5 : enable\n"
|
|
" #define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3)\n"
|
|
" #define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3)\n"
|
|
" #endif\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_GLSL_130 == 1)\n"
|
|
" // Requires \"#version 130\" or better\n"
|
|
" #define half float\n"
|
|
" #define half2 vec2\n"
|
|
" #define half3 vec3\n"
|
|
" #define half4 vec4\n"
|
|
" #define int2 ivec2\n"
|
|
" #define float2 vec2\n"
|
|
" #define float3 vec3\n"
|
|
" #define float4 vec4\n"
|
|
" #define FxaaInt2 ivec2\n"
|
|
" #define FxaaFloat2 vec2\n"
|
|
" #define FxaaFloat3 vec3\n"
|
|
" #define FxaaFloat4 vec4\n"
|
|
" #define FxaaDiscard discard\n"
|
|
" #define FxaaDot3(a, b) dot(a, b)\n"
|
|
" #define FxaaSat(x) clamp(x, 0.0, 1.0)\n"
|
|
" #define FxaaLerp(x,y,s) mix(x,y,s)\n"
|
|
" #define FxaaTex sampler2D\n"
|
|
" #define FxaaTexTop(t, p) textureLod(t, p, 0.0)\n"
|
|
" #define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)\n"
|
|
" #if (FXAA_GATHER4_ALPHA == 1)\n"
|
|
" // use #extension GL_ARB_gpu_shader5 : enable\n"
|
|
" #define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3)\n"
|
|
" #define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3)\n"
|
|
" #endif\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1)\n"
|
|
" #define int2 float2\n"
|
|
" #define FxaaInt2 float2\n"
|
|
" #define FxaaFloat2 float2\n"
|
|
" #define FxaaFloat3 float3\n"
|
|
" #define FxaaFloat4 float4\n"
|
|
" #define FxaaDiscard clip(-1)\n"
|
|
" #define FxaaDot3(a, b) dot(a, b)\n"
|
|
" #define FxaaSat(x) saturate(x)\n"
|
|
" #define FxaaLerp(x,y,s) lerp(x,y,s)\n"
|
|
" #define FxaaTex sampler2D\n"
|
|
" #define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))\n"
|
|
" #define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_HLSL_4 == 1)\n"
|
|
" #define FxaaInt2 int2\n"
|
|
" #define FxaaFloat2 float2\n"
|
|
" #define FxaaFloat3 float3\n"
|
|
" #define FxaaFloat4 float4\n"
|
|
" #define FxaaDiscard clip(-1)\n"
|
|
" #define FxaaDot3(a, b) dot(a, b)\n"
|
|
" #define FxaaSat(x) saturate(x)\n"
|
|
" #define FxaaLerp(x,y,s) lerp(x,y,s)\n"
|
|
" struct FxaaTex { SamplerState smpl; Texture2D tex; };\n"
|
|
" #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n"
|
|
" #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n"
|
|
"#endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#if (FXAA_HLSL_5 == 1)\n"
|
|
" #define FxaaInt2 int2\n"
|
|
" #define FxaaFloat2 float2\n"
|
|
" #define FxaaFloat3 float3\n"
|
|
" #define FxaaFloat4 float4\n"
|
|
" #define FxaaDiscard clip(-1)\n"
|
|
" #define FxaaDot3(a, b) dot(a, b)\n"
|
|
" #define FxaaSat(x) saturate(x)\n"
|
|
" #define FxaaLerp(x,y,s) lerp(x,y,s)\n"
|
|
" struct FxaaTex { SamplerState smpl; Texture2D tex; };\n"
|
|
" #define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)\n"
|
|
" #define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)\n"
|
|
" #define FxaaTexAlpha4(t, p, r) t.tex.GatherAlpha(t.smpl, p)\n"
|
|
" #define FxaaTexOffAlpha4(t, p, o, r) t.tex.GatherAlpha(t.smpl, p, o)\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" FXAA3 CONSOLE - 360 PIXEL SHADER\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"Might be some optimizations left here,\n"
|
|
"as of this latest change didn't have a PIX dump to verify if TEX bound.\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_360 == 1)\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"half4 FxaaPixelShader(\n"
|
|
" // {xy} = center of pixel\n"
|
|
" float2 pos,\n"
|
|
" // {xy__} = upper left of pixel\n"
|
|
" // {__zw} = lower right of pixel\n"
|
|
" float4 posPos,\n"
|
|
" // {rgb_} = color in linear or perceptual color space\n"
|
|
" // {___a} = alpha output is junk value\n"
|
|
" FxaaTex tex,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {xy} = rcpFrame not used on PC version of FXAA Console\n"
|
|
" float2 rcpFrame,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {x___} = 2.0/screenWidthInPixels\n"
|
|
" // {_y__} = 2.0/screenHeightInPixels\n"
|
|
" // {__z_} = 0.5/screenWidthInPixels\n"
|
|
" // {___w} = 0.5/screenHeightInPixels\n"
|
|
" float4 rcpFrameOpt\n"
|
|
") {\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 lumaNwNeSwSe;\n"
|
|
" lumaNwNeSwSe.x = FxaaTexTop(tex, posPos.xy).w;\n"
|
|
" lumaNwNeSwSe.y = FxaaTexTop(tex, posPos.zy).w;\n"
|
|
" lumaNwNeSwSe.z = FxaaTexTop(tex, posPos.xw).w;\n"
|
|
" lumaNwNeSwSe.w = FxaaTexTop(tex, posPos.zw).w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 rgbyM = FxaaTexTop(tex, pos.xy);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" lumaNwNeSwSe.y += 1.0/384.0;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);\n"
|
|
" half2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half lumaMin = min(lumaMinTemp.x, lumaMinTemp.y);\n"
|
|
" half lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half lumaMinM = min(lumaMin, rgbyM.w);\n"
|
|
" half lumaMaxM = max(lumaMax, rgbyM.w);\n"
|
|
" if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE_EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE_EDGE_THRESHOLD)) return rgbyM;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half2 dir;\n"
|
|
" dir.x = dot(lumaNwNeSwSe, float4(-1.0, -1.0, 1.0, 1.0));\n"
|
|
" dir.y = dot(lumaNwNeSwSe, float4( 1.0, -1.0, 1.0,-1.0));\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half2 dir1;\n"
|
|
" dir1 = normalize(dir.xy);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * FXAA_CONSOLE_EDGE_SHARPNESS;\n"
|
|
" half2 dir2;\n"
|
|
" dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * rcpFrameOpt.zw);\n"
|
|
" half4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * rcpFrameOpt.zw);\n"
|
|
" half4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * rcpFrameOpt.xy);\n"
|
|
" half4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * rcpFrameOpt.xy);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 rgbyA = rgbyN1 * 0.5 + rgbyP1 * 0.5;\n"
|
|
" half4 rgbyB = rgbyN2 * 0.25 + rgbyP2 * 0.25 + rgbyA * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" bool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax);\n"
|
|
" if(twoTap) rgbyB.xyz = rgbyA.xyz;\n"
|
|
" return rgbyB; }\n"
|
|
"/*==========================================================================*/\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" FXAA3 CONSOLE - 360 PIXEL SHADER OPTIMIZED PROTOTYPE\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"This prototype optimized version thanks to suggestions from Andy Luedke.\n"
|
|
"Should be fully tex bound in all cases.\n"
|
|
"As of the FXAA 3.10 release I have not tested this code,\n"
|
|
"but at least the missing \";\" was fixed.\n"
|
|
"If it does not work, please let me know so I can fix it.\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"Extra requirements,\n"
|
|
"(1.) Different inputs: no posPos.\n"
|
|
"(2.) Different inputs: alias three samplers with different exp bias settings!\n"
|
|
"(3.) New constants: setup fxaaConst as described below.\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_360_OPT == 1)\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"[reduceTempRegUsage(4)]\n"
|
|
"float4 FxaaPixelShader(\n"
|
|
" // {xy} = center of pixel\n"
|
|
" float2 pos,\n"
|
|
" // Three samplers,\n"
|
|
" // texExpBias0 = exponent bias 0\n"
|
|
" // texExpBiasNeg1 = exponent bias -1\n"
|
|
" // texExpBiasNeg2 = exponent bias -2\n"
|
|
" // {rgb_} = color in linear or perceptual color space\n"
|
|
" // {___a} = alpha output is junk value\n"
|
|
" uniform sampler2D texExpBias0,\n"
|
|
" uniform sampler2D texExpBiasNeg1,\n"
|
|
" uniform sampler2D texExpBiasNeg2,\n"
|
|
" // These must be in physical constant registers and NOT immedates\n"
|
|
" // Immedates will result in compiler un-optimizing\n"
|
|
" // width = screen width in pixels\n"
|
|
" // height = screen height in pixels\n"
|
|
" fxaaConstDir, // float4(1.0, -1.0, 0.25, -0.25);\n"
|
|
" fxaaConstInner, // float4(0.5/width, 0.5/height, -0.5/width, -0.5/height);\n"
|
|
" fxaaConstOuter // float4(8.0/width, 8.0/height, -4.0/width, -4.0/height);\n"
|
|
") {\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float4 lumaNwNeSwSe;\n"
|
|
" asm { \n"
|
|
" tfetch2D lumaNwNeSwSe.w___, texExpBias0, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false\n"
|
|
" tfetch2D lumaNwNeSwSe._w__, texExpBias0, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false\n"
|
|
" tfetch2D lumaNwNeSwSe.__w_, texExpBias0, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false\n"
|
|
" tfetch2D lumaNwNeSwSe.___w, texExpBias0, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false\n"
|
|
" };\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" lumaNwNeSwSe.y += 1.0/384.0;\n"
|
|
" float2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);\n"
|
|
" float2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);\n"
|
|
" float lumaMin = min(lumaMinTemp.x, lumaMinTemp.y);\n"
|
|
" float lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float4 rgbyM = tex2Dlod(texExpBias0, float4(pos.xy, 0.0, 0.0));\n"
|
|
" float4 lumaMinM = min(lumaMin, rgbyM.w);\n"
|
|
" float4 lumaMaxM = max(lumaMax, rgbyM.w);\n"
|
|
" if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE_EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE_EDGE_THRESHOLD)) return rgbyM;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float2 dir;\n"
|
|
" dir.x = dot(lumaNwNeSwSe, fxaaConstDir.yyxx);\n"
|
|
" dir.y = dot(lumaNwNeSwSe, fxaaConstDir.xyxy);\n"
|
|
" dir = normalize(dir);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float4 dir1 = dir.xyxy * fxaaConstInner.xyzw;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float4 dir2;\n"
|
|
" float dirAbsMinTimesC = min(abs(dir.x), abs(dir.y));\n"
|
|
" dir2 = saturate(fxaaConstOuter.zzww * dir.xyxy / FXAA_CONSOLE_EDGE_SHARPNESS / dirAbsMinTimesC + 0.5);\n"
|
|
" dir2 = dir2 * fxaaConstOuter.xyxy + fxaaConstOuter.zwzw;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float4 rgbyN1 = tex2Dlod(texExpBiasNeg1, float4(pos.xy + dir1.xy, 0.0, 0.0));\n"
|
|
" float4 rgbyP1 = tex2Dlod(texExpBiasNeg1, float4(pos.xy + dir1.zw, 0.0, 0.0));\n"
|
|
" float4 rgbyN2 = tex2Dlod(texExpBiasNeg2, float4(pos.xy + dir2.xy, 0.0, 0.0));\n"
|
|
" float4 rgbyP2 = tex2Dlod(texExpBiasNeg2, float4(pos.xy + dir2.zw, 0.0, 0.0));\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 rgbyA = rgbyN1 + rgbyP1;\n"
|
|
" half4 rgbyB = rgbyN2 + rgbyP2 * 0.5 + rgbyA;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float4 rgbyR = ((rgbyB.w - lumaMax) > 0.0) ? rgbyA : rgbyB;\n"
|
|
" rgbyR = ((rgbyB.w - lumaMin) > 0.0) ? rgbyR : rgbyA;\n"
|
|
" return rgbyR; }\n"
|
|
"/*==========================================================================*/\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT)\n"
|
|
"\n"
|
|
"==============================================================================\n"
|
|
"The code below does not exactly match the assembly.\n"
|
|
"I have a feeling that 12 cycles is possible, but was not able to get there.\n"
|
|
"Might have to increase register count to get full performance.\n"
|
|
"Note this shader does not use perspective interpolation.\n"
|
|
"\n"
|
|
"Use the following cgc options,\n"
|
|
"\n"
|
|
" --fenable-bx2 --fastmath --fastprecision --nofloatbindings\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
" NVSHADERPERF OUTPUT\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"For reference and to aid in debug, output of NVShaderPerf should match this,\n"
|
|
"\n"
|
|
"Shader to schedule:\n"
|
|
" 0: texpkb h0.w(TRUE), v5.zyxx, #0\n"
|
|
" 2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x\n"
|
|
" 4: texpkb h0.w(TRUE), v5.xwxx, #0\n"
|
|
" 6: addh h0.z(TRUE), -h2, h0.w\n"
|
|
" 7: texpkb h1.w(TRUE), v5, #0\n"
|
|
" 9: addh h0.x(TRUE), h0.z, -h1.w\n"
|
|
" 10: addh h3.w(TRUE), h0.z, h1\n"
|
|
" 11: texpkb h2.w(TRUE), v5.zwzz, #0\n"
|
|
" 13: addh h0.z(TRUE), h3.w, -h2.w\n"
|
|
" 14: addh h0.x(TRUE), h2.w, h0\n"
|
|
" 15: nrmh h1.xz(TRUE), h0_n\n"
|
|
" 16: minh_m8 h0.x(TRUE), |h1|, |h1.z|\n"
|
|
" 17: maxh h4.w(TRUE), h0, h1\n"
|
|
" 18: divx h2.xy(TRUE), h1_n.xzzw, h0_n\n"
|
|
" 19: movr r1.zw(TRUE), v4.xxxy\n"
|
|
" 20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww\n"
|
|
" 22: minh h5.w(TRUE), h0, h1\n"
|
|
" 23: texpkb h0(TRUE), r2.xzxx, #0\n"
|
|
" 25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1\n"
|
|
" 27: maxh h4.x(TRUE), h2.z, h2.w\n"
|
|
" 28: texpkb h1(TRUE), r0.zwzz, #0\n"
|
|
" 30: addh_d2 h1(TRUE), h0, h1\n"
|
|
" 31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz\n"
|
|
" 33: texpkb h0(TRUE), r0, #0\n"
|
|
" 35: minh h4.z(TRUE), h2, h2.w\n"
|
|
" 36: fenct TRUE\n"
|
|
" 37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz\n"
|
|
" 39: texpkb h2(TRUE), r1, #0\n"
|
|
" 41: addh_d2 h0(TRUE), h0, h2\n"
|
|
" 42: maxh h2.w(TRUE), h4, h4.x\n"
|
|
" 43: minh h2.x(TRUE), h5.w, h4.z\n"
|
|
" 44: addh_d2 h0(TRUE), h0, h1\n"
|
|
" 45: slth h2.x(TRUE), h0.w, h2\n"
|
|
" 46: sgth h2.w(TRUE), h0, h2\n"
|
|
" 47: movh h0(TRUE), h0\n"
|
|
" 48: addx.c0 rc(TRUE), h2, h2.w\n"
|
|
" 49: movh h0(c0.NE.x), h1\n"
|
|
"\n"
|
|
"IPU0 ------ Simplified schedule: --------\n"
|
|
"Pass | Unit | uOp | PC: Op\n"
|
|
"-----+--------+------+-------------------------\n"
|
|
" 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;\n"
|
|
" | SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-;\n"
|
|
" | | |\n"
|
|
" 2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;\n"
|
|
" | SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-;\n"
|
|
" | | |\n"
|
|
" 3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;\n"
|
|
" | SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---;\n"
|
|
" | SCB1 | add | 10: ADDh h3.w, h0.---z, h1;\n"
|
|
" | | |\n"
|
|
" 4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;\n"
|
|
" | SCB0 | add | 14: ADDh h0.x, h2.w---, h0;\n"
|
|
" | SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-;\n"
|
|
" | | |\n"
|
|
" 5 | SCT1 | mov | 15: NRMh h1.xz, h0;\n"
|
|
" | SRB | nrm | 15: NRMh h1.xz, h0;\n"
|
|
" | SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|;\n"
|
|
" | SCB1 | max | 17: MAXh h4.w, h0, h1;\n"
|
|
" | | |\n"
|
|
" 6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0;\n"
|
|
" | SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy;\n"
|
|
" | SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-;\n"
|
|
" | SCB1 | min | 22: MINh h5.w, h0, h1;\n"
|
|
" | | |\n"
|
|
" 7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;\n"
|
|
" | SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---;\n"
|
|
" | SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1;\n"
|
|
" | | |\n"
|
|
" 8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;\n"
|
|
" | SCB0/1 | add | 30: ADDh/2 h1, h0, h1;\n"
|
|
" | | |\n"
|
|
" 9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--;\n"
|
|
" | SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0;\n"
|
|
" | TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0;\n"
|
|
" | SCB1 | min | 35: MINh h4.z, h2, h2.--w-;\n"
|
|
" | | |\n"
|
|
" 10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--;\n"
|
|
" | SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0;\n"
|
|
" | TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0;\n"
|
|
" | SCB0/1 | add | 41: ADDh/2 h0, h0, h2;\n"
|
|
" | | |\n"
|
|
" 11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---;\n"
|
|
" | SCT1 | max | 42: MAXh h2.w, h4, h4.---x;\n"
|
|
" | SCB0/1 | add | 44: ADDh/2 h0, h0, h1;\n"
|
|
" | | |\n"
|
|
" 12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2;\n"
|
|
" | SCT1 | set | 46: SGTh h2.w, h0, h2;\n"
|
|
" | SCB0/1 | mul | 47: MOVh h0, h0;\n"
|
|
" | | |\n"
|
|
" 13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---;\n"
|
|
" | SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1;\n"
|
|
" \n"
|
|
"Pass SCT TEX SCB\n"
|
|
" 1: 0\% 100\% 25\%\n"
|
|
" 2: 0\% 100\% 25\%\n"
|
|
" 3: 0\% 100\% 50\%\n"
|
|
" 4: 0\% 100\% 50\%\n"
|
|
" 5: 0\% 0\% 50\%\n"
|
|
" 6: 100\% 0\% 75\%\n"
|
|
" 7: 0\% 100\% 75\%\n"
|
|
" 8: 0\% 100\% 100\%\n"
|
|
" 9: 0\% 100\% 25\%\n"
|
|
" 10: 0\% 100\% 100\%\n"
|
|
" 11: 50\% 0\% 100\%\n"
|
|
" 12: 50\% 0\% 100\%\n"
|
|
" 13: 25\% 0\% 100\%\n"
|
|
"\n"
|
|
"MEAN: 17\% 61\% 67\%\n"
|
|
"\n"
|
|
"Pass SCT0 SCT1 TEX SCB0 SCB1\n"
|
|
" 1: 0\% 0\% 100\% 0\% 100\%\n"
|
|
" 2: 0\% 0\% 100\% 0\% 100\%\n"
|
|
" 3: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 4: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 5: 0\% 0\% 0\% 100\% 100\%\n"
|
|
" 6: 100\% 100\% 0\% 100\% 100\%\n"
|
|
" 7: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 8: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 9: 0\% 0\% 100\% 0\% 100\%\n"
|
|
" 10: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 11: 100\% 100\% 0\% 100\% 100\%\n"
|
|
" 12: 100\% 100\% 0\% 100\% 100\%\n"
|
|
" 13: 100\% 0\% 0\% 100\% 100\%\n"
|
|
"\n"
|
|
"MEAN: 30\% 23\% 61\% 76\% 100\%\n"
|
|
"Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5\n"
|
|
"Results 13 cycles, 3 r regs, 923,076,923 pixels/s\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0)\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#pragma disablepc all\n"
|
|
"#pragma option O3\n"
|
|
"#pragma option OutColorPrec=fp16\n"
|
|
"#pragma texformat default RGBA8\n"
|
|
"/*==========================================================================*/\n"
|
|
"half4 FxaaPixelShader(\n"
|
|
" // {xy} = center of pixel\n"
|
|
" float2 pos,\n"
|
|
" // {xy__} = upper left of pixel\n"
|
|
" // {__zw} = lower right of pixel\n"
|
|
" float4 posPos,\n"
|
|
" // {rgb_} = color in linear or perceptual color space\n"
|
|
" // {___a} = luma in perceptual color space (not linear)\n"
|
|
" sampler2D tex,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {xy} = rcpFrame not used on PS3\n"
|
|
" float2 rcpFrame,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {x___} = 2.0/screenWidthInPixels\n"
|
|
" // {_y__} = 2.0/screenHeightInPixels\n"
|
|
" // {__z_} = 0.5/screenWidthInPixels\n"
|
|
" // {___w} = 0.5/screenHeightInPixels\n"
|
|
" float4 rcpFrameOpt\n"
|
|
") {\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (1)\n"
|
|
" half4 dir;\n"
|
|
" half4 lumaNe = h4tex2Dlod(tex, half4(posPos.zy, 0, 0));\n"
|
|
" lumaNe.w += half(1.0/512.0);\n"
|
|
" dir.x = -lumaNe.w;\n"
|
|
" dir.z = -lumaNe.w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (2)\n"
|
|
" half4 lumaSw = h4tex2Dlod(tex, half4(posPos.xw, 0, 0));\n"
|
|
" dir.x += lumaSw.w;\n"
|
|
" dir.z += lumaSw.w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (3)\n"
|
|
" half4 lumaNw = h4tex2Dlod(tex, half4(posPos.xy, 0, 0));\n"
|
|
" dir.x -= lumaNw.w;\n"
|
|
" dir.z += lumaNw.w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (4)\n"
|
|
" half4 lumaSe = h4tex2Dlod(tex, half4(posPos.zw, 0, 0));\n"
|
|
" dir.x += lumaSe.w;\n"
|
|
" dir.z -= lumaSe.w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (5)\n"
|
|
" half4 dir1_pos;\n"
|
|
" dir1_pos.xy = normalize(dir.xyz).xz;\n"
|
|
" half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE_EDGE_SHARPNESS);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (6)\n"
|
|
" half4 dir2_pos;\n"
|
|
" dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));\n"
|
|
" dir1_pos.zw = pos.xy;\n"
|
|
" dir2_pos.zw = pos.xy;\n"
|
|
" half4 temp1N;\n"
|
|
" temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (7)\n"
|
|
" temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));\n"
|
|
" half4 rgby1;\n"
|
|
" rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (8)\n"
|
|
" rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));\n"
|
|
" rgby1 = (temp1N + rgby1) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (9)\n"
|
|
" half4 temp2N;\n"
|
|
" temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy;\n"
|
|
" temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (10)\n"
|
|
" half4 rgby2;\n"
|
|
" rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy;\n"
|
|
" rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));\n"
|
|
" rgby2 = (temp2N + rgby2) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (11)\n"
|
|
" // compilier moves these scalar ops up to other cycles\n"
|
|
" half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));\n"
|
|
" half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));\n"
|
|
" rgby2 = (rgby2 + rgby1) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (12)\n"
|
|
" bool twoTapLt = rgby2.w < lumaMin;\n"
|
|
" bool twoTapGt = rgby2.w > lumaMax;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (13)\n"
|
|
" if(twoTapLt || twoTapGt) rgby2 = rgby1;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" return rgby2; }\n"
|
|
"/*==========================================================================*/\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT)\n"
|
|
"\n"
|
|
"==============================================================================\n"
|
|
"The code mostly matches the assembly.\n"
|
|
"I have a feeling that 14 cycles is possible, but was not able to get there.\n"
|
|
"Might have to increase register count to get full performance.\n"
|
|
"Note this shader does not use perspective interpolation.\n"
|
|
"\n"
|
|
"Use the following cgc options,\n"
|
|
"\n"
|
|
" --fenable-bx2 --fastmath --fastprecision --nofloatbindings\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
" NVSHADERPERF OUTPUT\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"For reference and to aid in debug, output of NVShaderPerf should match this,\n"
|
|
"\n"
|
|
"Shader to schedule:\n"
|
|
" 0: texpkb h0.w(TRUE), v5.zyxx, #0\n"
|
|
" 2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x\n"
|
|
" 4: texpkb h1.w(TRUE), v5.xwxx, #0\n"
|
|
" 6: addh h0.x(TRUE), h1.w, -h2.y\n"
|
|
" 7: texpkb h2.w(TRUE), v5.zwzz, #0\n"
|
|
" 9: minh h4.w(TRUE), h2.y, h2\n"
|
|
" 10: maxh h5.x(TRUE), h2.y, h2.w\n"
|
|
" 11: texpkb h0.w(TRUE), v5, #0\n"
|
|
" 13: addh h3.w(TRUE), -h0, h0.x\n"
|
|
" 14: addh h0.x(TRUE), h0.w, h0\n"
|
|
" 15: addh h0.z(TRUE), -h2.w, h0.x\n"
|
|
" 16: addh h0.x(TRUE), h2.w, h3.w\n"
|
|
" 17: minh h5.y(TRUE), h0.w, h1.w\n"
|
|
" 18: nrmh h2.xz(TRUE), h0_n\n"
|
|
" 19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z|\n"
|
|
" 20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w\n"
|
|
" 21: movr r1.zw(TRUE), v4.xxxy\n"
|
|
" 22: maxh h2.w(TRUE), h0, h1\n"
|
|
" 23: fenct TRUE\n"
|
|
" 24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz\n"
|
|
" 26: texpkb h0(TRUE), r0, #0\n"
|
|
" 28: maxh h5.x(TRUE), h2.w, h5\n"
|
|
" 29: minh h5.w(TRUE), h5.y, h4\n"
|
|
" 30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz\n"
|
|
" 32: texpkb h2(TRUE), r1, #0\n"
|
|
" 34: addh_d2 h2(TRUE), h0, h2\n"
|
|
" 35: texpkb h1(TRUE), v4, #0\n"
|
|
" 37: maxh h5.y(TRUE), h5.x, h1.w\n"
|
|
" 38: minh h4.w(TRUE), h1, h5\n"
|
|
" 39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz\n"
|
|
" 41: texpkb h0(TRUE), r0, #0\n"
|
|
" 43: addh_m8 h5.z(TRUE), h5.y, -h4.w\n"
|
|
" 44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz\n"
|
|
" 46: texpkb h3(TRUE), r2, #0\n"
|
|
" 48: addh_d2 h0(TRUE), h0, h3\n"
|
|
" 49: addh_d2 h3(TRUE), h0, h2\n"
|
|
" 50: movh h0(TRUE), h3\n"
|
|
" 51: slth h3.x(TRUE), h3.w, h5.w\n"
|
|
" 52: sgth h3.w(TRUE), h3, h5.x\n"
|
|
" 53: addx.c0 rc(TRUE), h3.x, h3\n"
|
|
" 54: slth.c0 rc(TRUE), h5.z, h5\n"
|
|
" 55: movh h0(c0.NE.w), h2\n"
|
|
" 56: movh h0(c0.NE.x), h1\n"
|
|
"\n"
|
|
"IPU0 ------ Simplified schedule: --------\n"
|
|
"Pass | Unit | uOp | PC: Op\n"
|
|
"-----+--------+------+-------------------------\n"
|
|
" 1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;\n"
|
|
" | SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--;\n"
|
|
" | | |\n"
|
|
" 2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;\n"
|
|
" | SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---;\n"
|
|
" | | |\n"
|
|
" 3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;\n"
|
|
" | SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---;\n"
|
|
" | SCB1 | min | 9: MINh h4.w, h2.---y, h2;\n"
|
|
" | | |\n"
|
|
" 4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;\n"
|
|
" | SCB0 | add | 14: ADDh h0.x, h0.w---, h0;\n"
|
|
" | SCB1 | add | 13: ADDh h3.w,-h0, h0.---x;\n"
|
|
" | | |\n"
|
|
" 5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---;\n"
|
|
" | SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-;\n"
|
|
" | SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--;\n"
|
|
" | | |\n"
|
|
" 6 | SCT1 | mov | 18: NRMh h2.xz, h0;\n"
|
|
" | SRB | nrm | 18: NRMh h2.xz, h0;\n"
|
|
" | SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|;\n"
|
|
" | | |\n"
|
|
" 7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--;\n"
|
|
" | SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy;\n"
|
|
" | SCB1 | max | 22: MAXh h2.w, h0, h1;\n"
|
|
" | | |\n"
|
|
" 8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--;\n"
|
|
" | SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0;\n"
|
|
" | SCB0 | max | 28: MAXh h5.x, h2.w---, h5;\n"
|
|
" | SCB1 | min | 29: MINh h5.w, h5.---y, h4;\n"
|
|
" | | |\n"
|
|
" 9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--;\n"
|
|
" | SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0;\n"
|
|
" | SCB0/1 | add | 34: ADDh/2 h2, h0, h2;\n"
|
|
" | | |\n"
|
|
" 10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;\n"
|
|
" | SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--;\n"
|
|
" | SCB1 | min | 38: MINh h4.w, h1, h5;\n"
|
|
" | | |\n"
|
|
" 11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--;\n"
|
|
" | SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0;\n"
|
|
" | TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0;\n"
|
|
" | SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--;\n"
|
|
" | SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-;\n"
|
|
" | | |\n"
|
|
" 12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0;\n"
|
|
" | TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0;\n"
|
|
" | SCB0/1 | add | 48: ADDh/2 h0, h0, h3;\n"
|
|
" | | |\n"
|
|
" 13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2;\n"
|
|
" | SCB0/1 | mul | 50: MOVh h0, h3;\n"
|
|
" | | |\n"
|
|
" 14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---;\n"
|
|
" | SCT1 | set | 52: SGTh h3.w, h3, h5.---x;\n"
|
|
" | SCB0 | set | 54: SLThc0 rc, h5.z---, h5;\n"
|
|
" | SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3;\n"
|
|
" | | |\n"
|
|
" 15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2;\n"
|
|
" | SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1;\n"
|
|
" \n"
|
|
"Pass SCT TEX SCB\n"
|
|
" 1: 0\% 100\% 25\%\n"
|
|
" 2: 0\% 100\% 25\%\n"
|
|
" 3: 0\% 100\% 50\%\n"
|
|
" 4: 0\% 100\% 50\%\n"
|
|
" 5: 50\% 0\% 25\%\n"
|
|
" 6: 0\% 0\% 25\%\n"
|
|
" 7: 100\% 0\% 25\%\n"
|
|
" 8: 0\% 100\% 50\%\n"
|
|
" 9: 0\% 100\% 100\%\n"
|
|
" 10: 0\% 100\% 50\%\n"
|
|
" 11: 0\% 100\% 75\%\n"
|
|
" 12: 0\% 100\% 100\%\n"
|
|
" 13: 100\% 0\% 100\%\n"
|
|
" 14: 50\% 0\% 50\%\n"
|
|
" 15: 100\% 0\% 100\%\n"
|
|
"\n"
|
|
"MEAN: 26\% 60\% 56\%\n"
|
|
"\n"
|
|
"Pass SCT0 SCT1 TEX SCB0 SCB1\n"
|
|
" 1: 0\% 0\% 100\% 100\% 0\%\n"
|
|
" 2: 0\% 0\% 100\% 100\% 0\%\n"
|
|
" 3: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 4: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 5: 100\% 100\% 0\% 100\% 0\%\n"
|
|
" 6: 0\% 0\% 0\% 0\% 100\%\n"
|
|
" 7: 100\% 100\% 0\% 0\% 100\%\n"
|
|
" 8: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 9: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 10: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 11: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 12: 0\% 0\% 100\% 100\% 100\%\n"
|
|
" 13: 100\% 100\% 0\% 100\% 100\%\n"
|
|
" 14: 100\% 100\% 0\% 100\% 100\%\n"
|
|
" 15: 100\% 100\% 0\% 100\% 100\%\n"
|
|
"\n"
|
|
"MEAN: 33\% 33\% 60\% 86\% 80\%\n"
|
|
"Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5\n"
|
|
"Results 15 cycles, 3 r regs, 800,000,000 pixels/s\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1)\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"#pragma disablepc all\n"
|
|
"#pragma option O2\n"
|
|
"#pragma option OutColorPrec=fp16\n"
|
|
"#pragma texformat default RGBA8\n"
|
|
"/*==========================================================================*/\n"
|
|
"half4 FxaaPixelShader(\n"
|
|
" // {xy} = center of pixel\n"
|
|
" float2 pos,\n"
|
|
" // {xy__} = upper left of pixel\n"
|
|
" // {__zw} = lower right of pixel\n"
|
|
" float4 posPos,\n"
|
|
" // {rgb_} = color in linear or perceptual color space\n"
|
|
" // {___a} = luma in perceptual color space (not linear)\n"
|
|
" sampler2D tex,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {xy} = rcpFrame not used on PS3\n"
|
|
" float2 rcpFrame,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {x___} = 2.0/screenWidthInPixels\n"
|
|
" // {_y__} = 2.0/screenHeightInPixels\n"
|
|
" // {__z_} = 0.5/screenWidthInPixels\n"
|
|
" // {___w} = 0.5/screenHeightInPixels\n"
|
|
" float4 rcpFrameOpt\n"
|
|
") {\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (1)\n"
|
|
" half4 rgbyNe = h4tex2Dlod(tex, half4(posPos.zy, 0, 0));\n"
|
|
" half lumaNe = rgbyNe.w + half(1.0/512.0);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (2)\n"
|
|
" half4 lumaSw = h4tex2Dlod(tex, half4(posPos.xw, 0, 0));\n"
|
|
" half lumaSwNegNe = lumaSw.w - lumaNe;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (3)\n"
|
|
" half4 lumaNw = h4tex2Dlod(tex, half4(posPos.xy, 0, 0));\n"
|
|
" half lumaMaxNwSw = max(lumaNw.w, lumaSw.w);\n"
|
|
" half lumaMinNwSw = min(lumaNw.w, lumaSw.w);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (4)\n"
|
|
" half4 lumaSe = h4tex2Dlod(tex, half4(posPos.zw, 0, 0));\n"
|
|
" half dirZ = lumaNw.w + lumaSwNegNe;\n"
|
|
" half dirX = -lumaNw.w + lumaSwNegNe;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (5)\n"
|
|
" half3 dir;\n"
|
|
" dir.y = 0.0;\n"
|
|
" dir.x = lumaSe.w + dirX;\n"
|
|
" dir.z = -lumaSe.w + dirZ;\n"
|
|
" half lumaMinNeSe = min(lumaNe, lumaSe.w);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (6)\n"
|
|
" half4 dir1_pos;\n"
|
|
" dir1_pos.xy = normalize(dir).xz;\n"
|
|
" half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE_EDGE_SHARPNESS);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (7)\n"
|
|
" half4 dir2_pos;\n"
|
|
" dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0));\n"
|
|
" dir1_pos.zw = pos.xy;\n"
|
|
" dir2_pos.zw = pos.xy;\n"
|
|
" half lumaMaxNeSe = max(lumaNe, lumaSe.w);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (8)\n"
|
|
" half4 temp1N;\n"
|
|
" temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw;\n"
|
|
" temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));\n"
|
|
" half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe);\n"
|
|
" half lumaMin = min(lumaMinNwSw, lumaMinNeSe);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (9)\n"
|
|
" half4 rgby1;\n"
|
|
" rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw;\n"
|
|
" rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));\n"
|
|
" rgby1 = (temp1N + rgby1) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (10)\n"
|
|
" half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0));\n"
|
|
" half lumaMaxM = max(lumaMax, rgbyM.w);\n"
|
|
" half lumaMinM = min(lumaMin, rgbyM.w);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (11)\n"
|
|
" half4 temp2N;\n"
|
|
" temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy;\n"
|
|
" temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));\n"
|
|
" half4 rgby2;\n"
|
|
" rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy;\n"
|
|
" half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE_EDGE_THRESHOLD;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (12)\n"
|
|
" rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));\n"
|
|
" rgby2 = (temp2N + rgby2) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (13)\n"
|
|
" rgby2 = (rgby2 + rgby1) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (14)\n"
|
|
" bool twoTapLt = rgby2.w < lumaMin;\n"
|
|
" bool twoTapGt = rgby2.w > lumaMax;\n"
|
|
" bool earlyExit = lumaRangeM < lumaMax;\n"
|
|
" bool twoTap = twoTapLt || twoTapGt;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"// (15)\n"
|
|
" if(twoTap) rgby2 = rgby1;\n"
|
|
" if(earlyExit) rgby2 = rgbyM;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" return rgby2; }\n"
|
|
"/*==========================================================================*/\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" FXAA3 CONSOLE - PC PIXEL SHADER\n"
|
|
"\n"
|
|
"------------------------------------------------------------------------------\n"
|
|
"Using a modified version of the PS3 version here to best target old hardware.\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_PC_CONSOLE == 1)\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"half4 FxaaPixelShader(\n"
|
|
" // {xy} = center of pixel\n"
|
|
" float2 pos,\n"
|
|
" // {xy__} = upper left of pixel\n"
|
|
" // {__zw} = lower right of pixel\n"
|
|
" float4 posPos,\n"
|
|
" // {rgb_} = color in linear or perceptual color space\n"
|
|
" // {___a} = alpha output is junk value\n"
|
|
" FxaaTex tex,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {xy} = rcpFrame not used on PC version of FXAA Console\n"
|
|
" float2 rcpFrame,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {x___} = 2.0/screenWidthInPixels\n"
|
|
" // {_y__} = 2.0/screenHeightInPixels\n"
|
|
" // {__z_} = 0.5/screenWidthInPixels\n"
|
|
" // {___w} = 0.5/screenHeightInPixels\n"
|
|
" float4 rcpFrameOpt\n"
|
|
") {\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 dir;\n"
|
|
" dir.y = 0.0;\n"
|
|
" half4 lumaNe = FxaaTexTop(tex, posPos.zy);\n"
|
|
" lumaNe.w += half(1.0/384.0);\n"
|
|
" dir.x = -lumaNe.w;\n"
|
|
" dir.z = -lumaNe.w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 lumaSw = FxaaTexTop(tex, posPos.xw);\n"
|
|
" dir.x += lumaSw.w;\n"
|
|
" dir.z += lumaSw.w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 lumaNw = FxaaTexTop(tex, posPos.xy);\n"
|
|
" dir.x -= lumaNw.w;\n"
|
|
" dir.z += lumaNw.w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 lumaSe = FxaaTexTop(tex, posPos.zw);\n"
|
|
" dir.x += lumaSe.w;\n"
|
|
" dir.z -= lumaSe.w;\n"
|
|
"/*==========================================================================*/\n"
|
|
" #if (FXAA_EARLY_EXIT == 1)\n"
|
|
" half4 rgbyM = FxaaTexTop(tex, pos.xy);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));\n"
|
|
" half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half lumaMinM = min(lumaMin, rgbyM.w);\n"
|
|
" half lumaMaxM = max(lumaMax, rgbyM.w);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE_EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE_EDGE_THRESHOLD))\n"
|
|
" #if (FXAA_DISCARD == 1)\n"
|
|
" FxaaDiscard;\n"
|
|
" #else\n"
|
|
" return rgbyM;\n"
|
|
" #endif\n"
|
|
" #endif\n"
|
|
"/*==========================================================================*/\n"
|
|
" half4 dir1_pos;\n"
|
|
" dir1_pos.xy = normalize(dir.xyz).xz;\n"
|
|
" half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE_EDGE_SHARPNESS);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 dir2_pos;\n"
|
|
" dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));\n"
|
|
" dir1_pos.zw = pos.xy;\n"
|
|
" dir2_pos.zw = pos.xy;\n"
|
|
" half4 temp1N;\n"
|
|
" temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" temp1N = FxaaTexTop(tex, temp1N.xy);\n"
|
|
" half4 rgby1;\n"
|
|
" rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" rgby1 = FxaaTexTop(tex, rgby1.xy);\n"
|
|
" rgby1 = (temp1N + rgby1) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 temp2N;\n"
|
|
" temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy;\n"
|
|
" temp2N = FxaaTexTop(tex, temp2N.xy);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" half4 rgby2;\n"
|
|
" rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy;\n"
|
|
" rgby2 = FxaaTexTop(tex, rgby2.xy);\n"
|
|
" rgby2 = (temp2N + rgby2) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_EARLY_EXIT == 0)\n"
|
|
" half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));\n"
|
|
" half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));\n"
|
|
" #endif\n"
|
|
" rgby2 = (rgby2 + rgby1) * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" bool twoTapLt = rgby2.w < lumaMin;\n"
|
|
" bool twoTapGt = rgby2.w > lumaMax;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" if(twoTapLt || twoTapGt) rgby2 = rgby1;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" return rgby2; }\n"
|
|
"/*==========================================================================*/\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"\n"
|
|
"\n"
|
|
"/*============================================================================\n"
|
|
"\n"
|
|
" FXAA3 QUALITY - PC\n"
|
|
"\n"
|
|
"============================================================================*/\n"
|
|
"#if (FXAA_PC == 1)\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
"float4 FxaaPixelShader(\n"
|
|
" // {xy} = center of pixel\n"
|
|
" float2 pos,\n"
|
|
" // {xyzw} = not used on FXAA3 Quality\n"
|
|
" float4 posPos,\n"
|
|
" // {rgb_} = color in linear or perceptual color space\n"
|
|
" // {___a} = luma in perceptual color space (not linear)\n"
|
|
" FxaaTex tex,\n"
|
|
" // This must be from a constant/uniform.\n"
|
|
" // {x_} = 1.0/screenWidthInPixels\n"
|
|
" // {_y} = 1.0/screenHeightInPixels\n"
|
|
" float2 rcpFrame,\n"
|
|
" // {xyzw} = not used on FXAA3 Quality\n"
|
|
" float4 rcpFrameOpt\n"
|
|
") {\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float2 posM;\n"
|
|
" posM.x = pos.x;\n"
|
|
" posM.y = pos.y;\n"
|
|
" #if (FXAA_GATHER4_ALPHA == 1)\n"
|
|
" #if (FXAA_DISCARD == 0)\n"
|
|
" float4 rgbyM = FxaaTexTop(tex, posM);\n"
|
|
" #define lumaM rgbyM.w\n"
|
|
" #endif\n"
|
|
" float4 luma4A = FxaaTexAlpha4(tex, posM, rcpFrame.xy);\n"
|
|
" float4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1), rcpFrame.xy);\n"
|
|
" #if (FXAA_DISCARD == 1)\n"
|
|
" #define lumaM luma4A.w\n"
|
|
" #endif\n"
|
|
" #define lumaE luma4A.z\n"
|
|
" #define lumaS luma4A.x\n"
|
|
" #define lumaSE luma4A.y\n"
|
|
" #define lumaNW luma4B.w\n"
|
|
" #define lumaN luma4B.z\n"
|
|
" #define lumaW luma4B.x\n"
|
|
" #else\n"
|
|
" float4 rgbyM = FxaaTexTop(tex, posM);\n"
|
|
" #define lumaM rgbyM.w\n"
|
|
" float lumaS = FxaaTexOff(tex, posM, FxaaInt2( 0, 1), rcpFrame.xy).w;\n"
|
|
" float lumaE = FxaaTexOff(tex, posM, FxaaInt2( 1, 0), rcpFrame.xy).w;\n"
|
|
" float lumaN = FxaaTexOff(tex, posM, FxaaInt2( 0,-1), rcpFrame.xy).w;\n"
|
|
" float lumaW = FxaaTexOff(tex, posM, FxaaInt2(-1, 0), rcpFrame.xy).w;\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float maxSM = max(lumaS, lumaM);\n"
|
|
" float minSM = min(lumaS, lumaM);\n"
|
|
" float maxESM = max(lumaE, maxSM);\n"
|
|
" float minESM = min(lumaE, minSM);\n"
|
|
" float maxWN = max(lumaN, lumaW);\n"
|
|
" float minWN = min(lumaN, lumaW);\n"
|
|
" float rangeMax = max(maxWN, maxESM);\n"
|
|
" float rangeMin = min(minWN, minESM);\n"
|
|
" float rangeMaxScaled = rangeMax * FXAA_QUALITY_EDGE_THRESHOLD;\n"
|
|
" float range = rangeMax - rangeMin;\n"
|
|
" float rangeMaxClamped = max(FXAA_QUALITY_EDGE_THRESHOLD_MIN, rangeMaxScaled);\n"
|
|
" bool earlyExit = range < rangeMaxClamped;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" if(earlyExit)\n"
|
|
" #if (FXAA_DISCARD == 1)\n"
|
|
" FxaaDiscard;\n"
|
|
" #else\n"
|
|
" return rgbyM;\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_GATHER4_ALPHA == 0)\n"
|
|
" float lumaNW = FxaaTexOff(tex, posM, FxaaInt2(-1,-1), rcpFrame.xy).w;\n"
|
|
" float lumaSE = FxaaTexOff(tex, posM, FxaaInt2( 1, 1), rcpFrame.xy).w;\n"
|
|
" float lumaNE = FxaaTexOff(tex, posM, FxaaInt2( 1,-1), rcpFrame.xy).w;\n"
|
|
" float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w;\n"
|
|
" #else\n"
|
|
" float lumaNE = FxaaTexOff(tex, posM, FxaaInt2(1, -1), rcpFrame.xy).w;\n"
|
|
" float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w;\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float lumaNS = lumaN + lumaS;\n"
|
|
" float lumaWE = lumaW + lumaE;\n"
|
|
" float subpixRcpRange = 1.0/range;\n"
|
|
" float subpixNSWE = lumaNS + lumaWE;\n"
|
|
" float edgeHorz1 = (-2.0 * lumaM) + lumaNS;\n"
|
|
" float edgeVert1 = (-2.0 * lumaM) + lumaWE;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float lumaNESE = lumaNE + lumaSE;\n"
|
|
" float lumaNWNE = lumaNW + lumaNE;\n"
|
|
" float edgeHorz2 = (-2.0 * lumaE) + lumaNESE;\n"
|
|
" float edgeVert2 = (-2.0 * lumaN) + lumaNWNE;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float lumaNWSW = lumaNW + lumaSW;\n"
|
|
" float lumaSWSE = lumaSW + lumaSE;\n"
|
|
" float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);\n"
|
|
" float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);\n"
|
|
" float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;\n"
|
|
" float edgeVert3 = (-2.0 * lumaS) + lumaSWSE;\n"
|
|
" float edgeHorz = abs(edgeHorz3) + edgeHorz4;\n"
|
|
" float edgeVert = abs(edgeVert3) + edgeVert4;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float subpixNWSWNESE = lumaNWSW + lumaNESE;\n"
|
|
" float lengthSign = rcpFrame.x;\n"
|
|
" bool horzSpan = edgeHorz >= edgeVert;\n"
|
|
" float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" if(!horzSpan) lumaN = lumaW;\n"
|
|
" if(!horzSpan) lumaS = lumaE;\n"
|
|
" if(horzSpan) lengthSign = rcpFrame.y;\n"
|
|
" float subpixB = (subpixA * (1.0/12.0)) - lumaM;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float gradientN = lumaN - lumaM;\n"
|
|
" float gradientS = lumaS - lumaM;\n"
|
|
" float lumaNN = lumaN + lumaM;\n"
|
|
" float lumaSS = lumaS + lumaM;\n"
|
|
" bool pairN = abs(gradientN) >= abs(gradientS);\n"
|
|
" float gradient = max(abs(gradientN), abs(gradientS));\n"
|
|
" if(pairN) lengthSign = -lengthSign;\n"
|
|
" float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float2 posB;\n"
|
|
" posB.x = posM.x;\n"
|
|
" posB.y = posM.y;\n"
|
|
" float2 offNP;\n"
|
|
" offNP.x = (!horzSpan) ? 0.0 : rcpFrame.x;\n"
|
|
" offNP.y = ( horzSpan) ? 0.0 : rcpFrame.y;\n"
|
|
" if(!horzSpan) posB.x += lengthSign * 0.5;\n"
|
|
" if( horzSpan) posB.y += lengthSign * 0.5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float2 posN;\n"
|
|
" posN.x = posB.x - offNP.x * FXAA_QUALITY_P0;\n"
|
|
" posN.y = posB.y - offNP.y * FXAA_QUALITY_P0;\n"
|
|
" float2 posP;\n"
|
|
" posP.x = posB.x + offNP.x * FXAA_QUALITY_P0;\n"
|
|
" posP.y = posB.y + offNP.y * FXAA_QUALITY_P0;\n"
|
|
" float subpixD = ((-2.0)*subpixC) + 3.0;\n"
|
|
" float lumaEndN = FxaaTexTop(tex, posN).w;\n"
|
|
" float subpixE = subpixC * subpixC;\n"
|
|
" float lumaEndP = FxaaTexTop(tex, posP).w;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" if(!pairN) lumaNN = lumaSS;\n"
|
|
" float gradientScaled = gradient * 1.0/4.0;\n"
|
|
" float lumaMM = lumaM - lumaNN * 0.5;\n"
|
|
" float subpixF = subpixD * subpixE;\n"
|
|
" bool lumaMLTZero = lumaMM < 0.0;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" lumaEndN -= lumaNN * 0.5;\n"
|
|
" lumaEndP -= lumaNN * 0.5;\n"
|
|
" bool doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" bool doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P1;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P1;\n"
|
|
" bool doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P1;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P1;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P2;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P2;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P2;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P2;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 3)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P3;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P3;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P3;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P3;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 4)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P4;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P4;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P4;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P4;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 5)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P5;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P5;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P5;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P5;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 6)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P6;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P6;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P6;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P6;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 7)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P7;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P7;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P7;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P7;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 8)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P8;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P8;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P8;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P8;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 9)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P9;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P9;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P9;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P9;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 10)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P10;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P10;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P10;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P10;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 11)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P11;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P11;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P11;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P11;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" #if (FXAA_QUALITY_PS > 12)\n"
|
|
" if(doneNP) {\n"
|
|
" if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;\n"
|
|
" if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;\n"
|
|
" if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;\n"
|
|
" if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;\n"
|
|
" doneN = abs(lumaEndN) >= gradientScaled;\n"
|
|
" doneP = abs(lumaEndP) >= gradientScaled;\n"
|
|
" if(!doneN) posN.x -= offNP.x * FXAA_QUALITY_P12;\n"
|
|
" if(!doneN) posN.y -= offNP.y * FXAA_QUALITY_P12;\n"
|
|
" doneNP = (!doneN) || (!doneP);\n"
|
|
" if(!doneP) posP.x += offNP.x * FXAA_QUALITY_P12;\n"
|
|
" if(!doneP) posP.y += offNP.y * FXAA_QUALITY_P12;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
" #endif\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" }\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float dstN = posM.x - posN.x;\n"
|
|
" float dstP = posP.x - posM.x;\n"
|
|
" if(!horzSpan) dstN = posM.y - posN.y;\n"
|
|
" if(!horzSpan) dstP = posP.y - posM.y;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;\n"
|
|
" float spanLength = (dstP + dstN);\n"
|
|
" bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;\n"
|
|
" float spanLengthRcp = 1.0/spanLength;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" bool directionN = dstN < dstP;\n"
|
|
" float dst = min(dstN, dstP);\n"
|
|
" bool goodSpan = directionN ? goodSpanN : goodSpanP;\n"
|
|
" float subpixG = subpixF * subpixF;\n"
|
|
" float pixelOffset = (dst * (-spanLengthRcp)) + 0.5;\n"
|
|
" float subpixH = subpixG * FXAA_QUALITY_SUBPIX;\n"
|
|
"/*--------------------------------------------------------------------------*/\n"
|
|
" float pixelOffsetGood = goodSpan ? pixelOffset : 0.0;\n"
|
|
" float pixelOffsetSubpix = max(pixelOffsetGood, subpixH);\n"
|
|
" if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;\n"
|
|
" if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;\n"
|
|
" #if (FXAA_DISCARD == 1)\n"
|
|
" return FxaaTexTop(tex, posM);\n"
|
|
" #else\n"
|
|
" return float4(FxaaTexTop(tex, posM).xyz, lumaM);\n"
|
|
" #endif\n"
|
|
"}\n"
|
|
"/*==========================================================================*/\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"#ifdef SHADER_MODEL\n"
|
|
"PS_OUTPUT ps_main(PS_INPUT input)\n"
|
|
"{\n"
|
|
" PS_OUTPUT output;\n"
|
|
"\n"
|
|
" float2 pos = input.t;\n"
|
|
" float4 posPos = (float4)0;\n"
|
|
"\n"
|
|
" FxaaTex tex;\n"
|
|
"\n"
|
|
" #if SHADER_MODEL >= 0x400\n"
|
|
"\n"
|
|
" tex.tex = Texture;\n"
|
|
" tex.smpl = TextureSampler;\n"
|
|
"\n"
|
|
" #else\n"
|
|
"\n"
|
|
" tex = Texture;\n"
|
|
"\n"
|
|
" #endif\n"
|
|
"\n"
|
|
" output.c = FxaaPixelShader(pos, posPos, tex, _rcpFrame.xy, _rcpFrameOpt);\n"
|
|
"\n"
|
|
" return output;\n"
|
|
"}\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"#ifdef FXAA_GLSL_130\n"
|
|
"void ps_main()\n"
|
|
"{\n"
|
|
" vec2 pos = PSin_t;\n"
|
|
" vec4 posPos = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
|
"\n"
|
|
" SV_Target0 = FxaaPixelShader(pos, posPos, TextureSampler, _rcpFrame.xy, _rcpFrameOpt);\n"
|
|
"}\n"
|
|
"#endif\n"
|
|
"\n"
|
|
"#endif\n"
|
|
;
|