mirror of https://github.com/PCSX2/pcsx2.git
1996 lines
80 KiB
HLSL
1996 lines
80 KiB
HLSL
#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency
|
|
|
|
#define FXAA_PC 1
|
|
|
|
#if SHADER_MODEL >= 0x400
|
|
|
|
#if SHADER_MODEL >= 0x500
|
|
#define FXAA_HLSL_5 1
|
|
#else
|
|
#define FXAA_HLSL_4 1
|
|
#endif
|
|
|
|
Texture2D Texture;
|
|
SamplerState TextureSampler;
|
|
|
|
cbuffer cb0
|
|
{
|
|
float4 _rcpFrame;
|
|
float4 _rcpFrameOpt;
|
|
};
|
|
|
|
struct PS_INPUT
|
|
{
|
|
float4 p : SV_Position;
|
|
float2 t : TEXCOORD0;
|
|
};
|
|
|
|
struct PS_OUTPUT
|
|
{
|
|
float4 c : SV_Target0;
|
|
};
|
|
|
|
#elif SHADER_MODEL <= 0x300
|
|
|
|
#define FXAA_HLSL_3 1
|
|
|
|
sampler Texture : register(s0);
|
|
|
|
float4 _rcpFrame : register(c0);
|
|
float4 _rcpFrameOpt : register(c1);
|
|
|
|
struct PS_INPUT
|
|
{
|
|
#if SHADER_MODEL < 0x300
|
|
float4 p : TEXCOORD1;
|
|
#else
|
|
float4 p : VPOS;
|
|
#endif
|
|
float2 t : TEXCOORD0;
|
|
};
|
|
|
|
struct PS_OUTPUT
|
|
{
|
|
float4 c : COLOR;
|
|
};
|
|
|
|
#endif
|
|
|
|
/*============================================================================
|
|
|
|
|
|
NVIDIA FXAA 3.10 by TIMOTHY LOTTES
|
|
|
|
|
|
------------------------------------------------------------------------------
|
|
COPYRIGHT (C) 2010, 2011 NVIDIA CORPORATION. ALL RIGHTS RESERVED.
|
|
------------------------------------------------------------------------------
|
|
TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
|
|
*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
|
|
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF
|
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL NVIDIA
|
|
OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR
|
|
CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR
|
|
LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION,
|
|
OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR INABILITY TO USE
|
|
THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
DAMAGES.
|
|
|
|
------------------------------------------------------------------------------
|
|
INTEGRATION CHECKLIST
|
|
------------------------------------------------------------------------------
|
|
(1.)
|
|
In the shader source,
|
|
setup defines for the desired configuration.
|
|
Example,
|
|
|
|
#define FXAA_PC 1
|
|
#define FXAA_HLSL_3 1
|
|
#define FXAA_QUALITY__PRESET 12
|
|
#define FXAA_QUALITY__EDGE_THRESHOLD (1.0/6.0)
|
|
#define FXAA_QUALITY__EDGE_THRESHOLD_MIN (1.0/12.0)
|
|
|
|
(2.)
|
|
Then include this file,
|
|
|
|
#include "Fxaa3.h"
|
|
|
|
(3.)
|
|
Then call the FXAA pixel shader from within your desired shader,
|
|
|
|
return FxaaPixelShader(pos, posPos, tex, rcpFrame, rcpFrameOpt);
|
|
|
|
(4.)
|
|
Insure pass prior to FXAA outputs RGBL.
|
|
See next section.
|
|
|
|
(5.)
|
|
Setup engine to provide "rcpFrame" and "rcpFrameOpt" constants.
|
|
Not using constants will result in a performance loss.
|
|
|
|
// {x_} = 1.0/screenWidthInPixels
|
|
// {_y} = 1.0/screenHeightInPixels
|
|
float2 rcpFrame
|
|
|
|
// This must be from a constant/uniform.
|
|
// {x___} = 2.0/screenWidthInPixels
|
|
// {_y__} = 2.0/screenHeightInPixels
|
|
// {__z_} = 0.5/screenWidthInPixels
|
|
// {___w} = 0.5/screenHeightInPixels
|
|
float4 rcpFrameOpt
|
|
|
|
(5.a.)
|
|
Optionally change to this for sharper FXAA Console,
|
|
|
|
// This must be from a constant/uniform.
|
|
// {x___} = 2.0/screenWidthInPixels
|
|
// {_y__} = 2.0/screenHeightInPixels
|
|
// {__z_} = 0.333/screenWidthInPixels
|
|
// {___w} = 0.333/screenHeightInPixels
|
|
float4 rcpFrameOpt
|
|
|
|
(6.)
|
|
Have FXAA vertex shader run as a full screen triangle,
|
|
and output "pos" and "posPos" such that inputs in the pixel shader provide,
|
|
|
|
// {xy} = center of pixel
|
|
float2 pos,
|
|
|
|
// {xy__} = upper left of pixel
|
|
// {__zw} = lower right of pixel
|
|
float4 posPos,
|
|
|
|
(7.)
|
|
Insure the texture sampler used by FXAA is set to bilinear filtering.
|
|
|
|
|
|
------------------------------------------------------------------------------
|
|
INTEGRATION - RGBL AND COLORSPACE
|
|
------------------------------------------------------------------------------
|
|
FXAA3 requires RGBL as input.
|
|
|
|
RGB should be LDR (low dynamic range).
|
|
Specifically do FXAA after tonemapping.
|
|
|
|
RGB data as returned by a texture fetch can be linear or non-linear.
|
|
Note an "sRGB format" texture counts as linear,
|
|
because the result of a texture fetch is linear data.
|
|
Regular "RGBA8" textures in the sRGB colorspace are non-linear.
|
|
|
|
Luma must be stored in the alpha channel prior to running FXAA.
|
|
This luma should be in a perceptual space (could be gamma 2.0).
|
|
Example pass before FXAA where output is gamma 2.0 encoded,
|
|
|
|
color.rgb = ToneMap(color.rgb); // linear color output
|
|
color.rgb = sqrt(color.rgb); // gamma 2.0 color output
|
|
return color;
|
|
|
|
To use FXAA,
|
|
|
|
color.rgb = ToneMap(color.rgb); // linear color output
|
|
color.rgb = sqrt(color.rgb); // gamma 2.0 color output
|
|
color.a = dot(color.rgb, float3(0.299, 0.587, 0.114)); // compute luma
|
|
return color;
|
|
|
|
Another example where output is linear encoded,
|
|
say for instance writing to an sRGB formated render target,
|
|
where the render target does the conversion back to sRGB after blending,
|
|
|
|
color.rgb = ToneMap(color.rgb); // linear color output
|
|
return color;
|
|
|
|
To use FXAA,
|
|
|
|
color.rgb = ToneMap(color.rgb); // linear color output
|
|
color.a = sqrt(dot(color.rgb, float3(0.299, 0.587, 0.114))); // compute luma
|
|
return color;
|
|
|
|
Getting luma correct is required for the algorithm to work correctly.
|
|
|
|
|
|
------------------------------------------------------------------------------
|
|
BEING LINEARLY CORRECT?
|
|
------------------------------------------------------------------------------
|
|
Applying FXAA to a framebuffer with linear RGB color will look worse.
|
|
This is very counter intuitive, but happends to be true in this case.
|
|
The reason is because dithering artifacts will be more visiable
|
|
in a linear colorspace.
|
|
|
|
|
|
------------------------------------------------------------------------------
|
|
COMPLEX INTEGRATION
|
|
------------------------------------------------------------------------------
|
|
Q. What if the engine is blending into RGB before wanting to run FXAA?
|
|
|
|
A. In the last opaque pass prior to FXAA,
|
|
have the pass write out luma into alpha.
|
|
Then blend into RGB only.
|
|
FXAA should be able to run ok
|
|
assuming the blending pass did not any add aliasing.
|
|
This should be the common case for particles and common blending passes.
|
|
|
|
============================================================================*/
|
|
|
|
/*============================================================================
|
|
|
|
INTEGRATION KNOBS
|
|
|
|
============================================================================*/
|
|
//
|
|
// FXAA_PS3 and FXAA_360 choose the console algorithm (FXAA3 CONSOLE).
|
|
// FXAA_360_OPT is a prototype for the new optimized 360 version.
|
|
//
|
|
// 1 = Use API.
|
|
// 0 = Don't use API.
|
|
//
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_PS3
|
|
#define FXAA_PS3 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_360
|
|
#define FXAA_360 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_360_OPT
|
|
#define FXAA_360_OPT 0
|
|
#endif
|
|
/*==========================================================================*/
|
|
#ifndef FXAA_PC
|
|
//
|
|
// FXAA Quality
|
|
// The high quality PC algorithm.
|
|
//
|
|
#define FXAA_PC 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_PC_CONSOLE
|
|
//
|
|
// The console algorithm for PC is included
|
|
// for developers targeting really low spec machines.
|
|
//
|
|
#define FXAA_PC_CONSOLE 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_GLSL_120
|
|
#define FXAA_GLSL_120 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_GLSL_130
|
|
#define FXAA_GLSL_130 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_HLSL_3
|
|
#define FXAA_HLSL_3 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_HLSL_4
|
|
#define FXAA_HLSL_4 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_HLSL_5
|
|
#define FXAA_HLSL_5 0
|
|
#endif
|
|
/*==========================================================================*/
|
|
#ifndef FXAA_EARLY_EXIT
|
|
//
|
|
// Controls algorithm's early exit path.
|
|
// On PS3 turning this on adds 2 cycles to the shader.
|
|
// On 360 turning this off adds 10ths of a millisecond to the shader.
|
|
// Turning this off on console will result in a more blurry image.
|
|
// So this defaults to on.
|
|
//
|
|
// 1 = On.
|
|
// 0 = Off.
|
|
//
|
|
#define FXAA_EARLY_EXIT 1
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_DISCARD
|
|
//
|
|
// Only valid for PC OpenGL currently.
|
|
//
|
|
// 1 = Use discard on pixels which don't need AA.
|
|
// For APIs which enable concurrent TEX+ROP from same surface.
|
|
// 0 = Return unchanged color on pixels which don't need AA.
|
|
//
|
|
#define FXAA_DISCARD 0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_FAST_PIXEL_OFFSET
|
|
//
|
|
// Used for GLSL 120 only.
|
|
//
|
|
// 1 = GL API supports fast pixel offsets
|
|
// 0 = do not use fast pixel offsets
|
|
//
|
|
#ifdef GL_EXT_gpu_shader4
|
|
#define FXAA_FAST_PIXEL_OFFSET 1
|
|
#endif
|
|
#ifdef GL_NV_gpu_shader5
|
|
#define FXAA_FAST_PIXEL_OFFSET 1
|
|
#endif
|
|
#ifdef GL_ARB_gpu_shader5
|
|
#define FXAA_FAST_PIXEL_OFFSET 1
|
|
#endif
|
|
#ifndef FXAA_FAST_PIXEL_OFFSET
|
|
#define FXAA_FAST_PIXEL_OFFSET 0
|
|
#endif
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_GATHER4_ALPHA
|
|
//
|
|
// 1 = API supports gather4 on alpha channel.
|
|
// 0 = API does not support gather4 on alpha channel.
|
|
//
|
|
#if (FXAA_HLSL_5 == 1)
|
|
#define FXAA_GATHER4_ALPHA 1
|
|
#endif
|
|
#ifdef GL_ARB_gpu_shader5
|
|
#define FXAA_GATHER4_ALPHA 1
|
|
#endif
|
|
#ifdef GL_NV_gpu_shader5
|
|
#define FXAA_GATHER4_ALPHA 1
|
|
#endif
|
|
#ifndef FXAA_GATHER4_ALPHA
|
|
#define FXAA_GATHER4_ALPHA 0
|
|
#endif
|
|
#endif
|
|
|
|
/*============================================================================
|
|
FXAA CONSOLE - TUNING KNOBS
|
|
============================================================================*/
|
|
#ifndef FXAA_CONSOLE__EDGE_SHARPNESS
|
|
//
|
|
// Consoles the sharpness of edges.
|
|
//
|
|
// Due to the PS3 being ALU bound,
|
|
// there are only two safe values here: 4 and 8.
|
|
// These options use the shaders ability to a free *|/ by 4|8.
|
|
//
|
|
// 8.0 is sharper
|
|
// 4.0 is softer
|
|
// 2.0 is really soft (good for vector graphics inputs)
|
|
//
|
|
#if 1
|
|
#define FXAA_CONSOLE__EDGE_SHARPNESS 8.0
|
|
#endif
|
|
#if 0
|
|
#define FXAA_CONSOLE__EDGE_SHARPNESS 4.0
|
|
#endif
|
|
#if 0
|
|
#define FXAA_CONSOLE__EDGE_SHARPNESS 2.0
|
|
#endif
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_CONSOLE__EDGE_THRESHOLD
|
|
//
|
|
// The minimum amount of local contrast required to apply algorithm.
|
|
// The console setting has a different mapping than the quality setting.
|
|
//
|
|
// This only applies when FXAA_EARLY_EXIT is 1.
|
|
//
|
|
// Due to the PS3 being ALU bound,
|
|
// there are only two safe values here: 0.25 and 0.125.
|
|
// These options use the shaders ability to a free *|/ by 4|8.
|
|
//
|
|
// 0.125 leaves less aliasing, but is softer
|
|
// 0.25 leaves more aliasing, and is sharper
|
|
//
|
|
#if 1
|
|
#define FXAA_CONSOLE__EDGE_THRESHOLD 0.125
|
|
#else
|
|
#define FXAA_CONSOLE__EDGE_THRESHOLD 0.25
|
|
#endif
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_CONSOLE__EDGE_THRESHOLD_MIN
|
|
//
|
|
// Trims the algorithm from processing darks.
|
|
// The console setting has a different mapping than the quality setting.
|
|
//
|
|
// This only applies when FXAA_EARLY_EXIT is 1.
|
|
//
|
|
// This does not apply to PS3.
|
|
// PS3 was simplified to avoid more shader instructions.
|
|
//
|
|
#define FXAA_CONSOLE__EDGE_THRESHOLD_MIN 0.05
|
|
#endif
|
|
|
|
/*============================================================================
|
|
FXAA QUALITY - TUNING KNOBS
|
|
============================================================================*/
|
|
#ifndef FXAA_QUALITY__EDGE_THRESHOLD
|
|
//
|
|
// The minimum amount of local contrast required to apply algorithm.
|
|
//
|
|
// 1/3 - too little
|
|
// 1/4 - low quality
|
|
// 1/6 - default
|
|
// 1/8 - high quality (default)
|
|
// 1/16 - overkill
|
|
//
|
|
#define FXAA_QUALITY__EDGE_THRESHOLD (1.0/6.0)
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_QUALITY__EDGE_THRESHOLD_MIN
|
|
//
|
|
// Trims the algorithm from processing darks.
|
|
//
|
|
// 1/32 - visible limit
|
|
// 1/16 - high quality
|
|
// 1/12 - upper limit (default, the start of visible unfiltered edges)
|
|
//
|
|
#define FXAA_QUALITY__EDGE_THRESHOLD_MIN (1.0/12.0)
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_QUALITY__SUBPIX
|
|
//
|
|
// Choose the amount of sub-pixel aliasing removal.
|
|
//
|
|
// 1 - upper limit (softer)
|
|
// 3/4 - default amount of filtering
|
|
// 1/2 - lower limit (sharper, less sub-pixel aliasing removal)
|
|
//
|
|
#define FXAA_QUALITY__SUBPIX (3.0/4.0)
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#ifndef FXAA_QUALITY__PRESET
|
|
//
|
|
// Choose the quality preset.
|
|
//
|
|
// OPTIONS
|
|
// -----------------------------------------------------------------------
|
|
// 10 to 15 - default medium dither (10=fastest, 15=highest quality)
|
|
// 20 to 29 - less dither, more expensive (20=fastest, 29=highest quality)
|
|
// 39 - no dither, very expensive
|
|
//
|
|
// NOTES
|
|
// -----------------------------------------------------------------------
|
|
// 12 = slightly faster then FXAA 3.9 and higher edge quality (default)
|
|
// 13 = about same speed as FXAA 3.9 and better than 12
|
|
// 23 = closest to FXAA 3.9 visually and performance wise
|
|
// _ = the lowest digit is directly related to performance
|
|
// _ = the highest digit is directly related to style
|
|
//
|
|
#define FXAA_QUALITY__PRESET 12
|
|
#endif
|
|
|
|
|
|
/*============================================================================
|
|
|
|
FXAA QUALITY - PRESETS
|
|
|
|
============================================================================*/
|
|
|
|
/*============================================================================
|
|
FXAA QUALITY - MEDIUM DITHER PRESETS
|
|
============================================================================*/
|
|
#if (FXAA_QUALITY__PRESET == 10)
|
|
#define FXAA_QUALITY__PS 3
|
|
#define FXAA_QUALITY__P0 1.5
|
|
#define FXAA_QUALITY__P1 3.0
|
|
#define FXAA_QUALITY__P2 12.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 11)
|
|
#define FXAA_QUALITY__PS 4
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 3.0
|
|
#define FXAA_QUALITY__P3 12.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 12)
|
|
#define FXAA_QUALITY__PS 5
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 4.0
|
|
#define FXAA_QUALITY__P4 12.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 13)
|
|
#define FXAA_QUALITY__PS 6
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 4.0
|
|
#define FXAA_QUALITY__P5 12.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 14)
|
|
#define FXAA_QUALITY__PS 7
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 4.0
|
|
#define FXAA_QUALITY__P6 12.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 15)
|
|
#define FXAA_QUALITY__PS 8
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 2.0
|
|
#define FXAA_QUALITY__P6 4.0
|
|
#define FXAA_QUALITY__P7 12.0
|
|
#endif
|
|
|
|
/*============================================================================
|
|
FXAA QUALITY - LOW DITHER PRESETS
|
|
============================================================================*/
|
|
#if (FXAA_QUALITY__PRESET == 20)
|
|
#define FXAA_QUALITY__PS 3
|
|
#define FXAA_QUALITY__P0 1.5
|
|
#define FXAA_QUALITY__P1 2.0
|
|
#define FXAA_QUALITY__P2 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 21)
|
|
#define FXAA_QUALITY__PS 4
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 22)
|
|
#define FXAA_QUALITY__PS 5
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 23)
|
|
#define FXAA_QUALITY__PS 6
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 24)
|
|
#define FXAA_QUALITY__PS 7
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 3.0
|
|
#define FXAA_QUALITY__P6 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 25)
|
|
#define FXAA_QUALITY__PS 8
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 2.0
|
|
#define FXAA_QUALITY__P6 4.0
|
|
#define FXAA_QUALITY__P7 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 26)
|
|
#define FXAA_QUALITY__PS 9
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 2.0
|
|
#define FXAA_QUALITY__P6 2.0
|
|
#define FXAA_QUALITY__P7 4.0
|
|
#define FXAA_QUALITY__P8 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 27)
|
|
#define FXAA_QUALITY__PS 10
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 2.0
|
|
#define FXAA_QUALITY__P6 2.0
|
|
#define FXAA_QUALITY__P7 2.0
|
|
#define FXAA_QUALITY__P8 4.0
|
|
#define FXAA_QUALITY__P9 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 28)
|
|
#define FXAA_QUALITY__PS 11
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 2.0
|
|
#define FXAA_QUALITY__P6 2.0
|
|
#define FXAA_QUALITY__P7 2.0
|
|
#define FXAA_QUALITY__P8 2.0
|
|
#define FXAA_QUALITY__P9 4.0
|
|
#define FXAA_QUALITY__P10 8.0
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PRESET == 29)
|
|
#define FXAA_QUALITY__PS 12
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.5
|
|
#define FXAA_QUALITY__P2 2.0
|
|
#define FXAA_QUALITY__P3 2.0
|
|
#define FXAA_QUALITY__P4 2.0
|
|
#define FXAA_QUALITY__P5 2.0
|
|
#define FXAA_QUALITY__P6 2.0
|
|
#define FXAA_QUALITY__P7 2.0
|
|
#define FXAA_QUALITY__P8 2.0
|
|
#define FXAA_QUALITY__P9 2.0
|
|
#define FXAA_QUALITY__P10 4.0
|
|
#define FXAA_QUALITY__P11 8.0
|
|
#endif
|
|
|
|
/*============================================================================
|
|
FXAA QUALITY - EXTREME QUALITY
|
|
============================================================================*/
|
|
#if (FXAA_QUALITY__PRESET == 39)
|
|
#define FXAA_QUALITY__PS 12
|
|
#define FXAA_QUALITY__P0 1.0
|
|
#define FXAA_QUALITY__P1 1.0
|
|
#define FXAA_QUALITY__P2 1.0
|
|
#define FXAA_QUALITY__P3 1.0
|
|
#define FXAA_QUALITY__P4 1.0
|
|
#define FXAA_QUALITY__P5 1.5
|
|
#define FXAA_QUALITY__P6 2.0
|
|
#define FXAA_QUALITY__P7 2.0
|
|
#define FXAA_QUALITY__P8 2.0
|
|
#define FXAA_QUALITY__P9 2.0
|
|
#define FXAA_QUALITY__P10 4.0
|
|
#define FXAA_QUALITY__P11 8.0
|
|
#endif
|
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
API PORTING
|
|
|
|
============================================================================*/
|
|
#if FXAA_GLSL_120
|
|
// Requires,
|
|
// #version 120
|
|
// And at least,
|
|
// #extension GL_EXT_gpu_shader4 : enable
|
|
// (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9)
|
|
#define half float
|
|
#define half2 vec2
|
|
#define half3 vec3
|
|
#define half4 vec4
|
|
#define int2 ivec2
|
|
#define float2 vec2
|
|
#define float3 vec3
|
|
#define float4 vec4
|
|
#define FxaaInt2 ivec2
|
|
#define FxaaFloat2 vec2
|
|
#define FxaaFloat3 vec3
|
|
#define FxaaFloat4 vec4
|
|
#define FxaaDiscard discard
|
|
#define FxaaDot3(a, b) dot(a, b)
|
|
#define FxaaSat(x) clamp(x, 0.0, 1.0)
|
|
#define FxaaLerp(x,y,s) mix(x,y,s)
|
|
#define FxaaTex sampler2D
|
|
#define FxaaTexTop(t, p) texture2DLod(t, p, 0.0)
|
|
#if (FXAA_FAST_PIXEL_OFFSET == 1)
|
|
#define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)
|
|
#else
|
|
#define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0)
|
|
#endif
|
|
#if (FXAA_GATHER4_ALPHA == 1)
|
|
// use #extension GL_ARB_gpu_shader5 : enable
|
|
#define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3)
|
|
#define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3)
|
|
#endif
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if FXAA_GLSL_130
|
|
// Requires "#version 130" or better
|
|
#define half float
|
|
#define half2 vec2
|
|
#define half3 vec3
|
|
#define half4 vec4
|
|
#define int2 ivec2
|
|
#define float2 vec2
|
|
#define float3 vec3
|
|
#define float4 vec4
|
|
#define FxaaInt2 ivec2
|
|
#define FxaaFloat2 vec2
|
|
#define FxaaFloat3 vec3
|
|
#define FxaaFloat4 vec4
|
|
#define FxaaDiscard discard
|
|
#define FxaaDot3(a, b) dot(a, b)
|
|
#define FxaaSat(x) clamp(x, 0.0, 1.0)
|
|
#define FxaaLerp(x,y,s) mix(x,y,s)
|
|
#define FxaaTex sampler2D
|
|
#define FxaaTexTop(t, p) textureLod(t, p, 0.0)
|
|
#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
|
|
#if (FXAA_GATHER4_ALPHA == 1)
|
|
// use #extension GL_ARB_gpu_shader5 : enable
|
|
#define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3)
|
|
#define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3)
|
|
#endif
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_HLSL_3 == 1) || (FXAA_360 == 1)
|
|
#define int2 float2
|
|
#define FxaaInt2 float2
|
|
#define FxaaFloat2 float2
|
|
#define FxaaFloat3 float3
|
|
#define FxaaFloat4 float4
|
|
#define FxaaDiscard clip(-1)
|
|
#define FxaaDot3(a, b) dot(a, b)
|
|
#define FxaaSat(x) saturate(x)
|
|
#define FxaaLerp(x,y,s) lerp(x,y,s)
|
|
#define FxaaTex sampler2D
|
|
#define FxaaTexTop(t, p) tex2Dlod(t, float4(p, 0.0, 0.0))
|
|
#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p + (o * r), 0, 0))
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if FXAA_HLSL_4
|
|
#define FxaaInt2 int2
|
|
#define FxaaFloat2 float2
|
|
#define FxaaFloat3 float3
|
|
#define FxaaFloat4 float4
|
|
#define FxaaDiscard clip(-1)
|
|
#define FxaaDot3(a, b) dot(a, b)
|
|
#define FxaaSat(x) saturate(x)
|
|
#define FxaaLerp(x,y,s) lerp(x,y,s)
|
|
struct FxaaTex { SamplerState smpl; Texture2D tex; };
|
|
#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
|
|
#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if FXAA_HLSL_5
|
|
#define FxaaInt2 int2
|
|
#define FxaaFloat2 float2
|
|
#define FxaaFloat3 float3
|
|
#define FxaaFloat4 float4
|
|
#define FxaaDiscard clip(-1)
|
|
#define FxaaDot3(a, b) dot(a, b)
|
|
#define FxaaSat(x) saturate(x)
|
|
#define FxaaLerp(x,y,s) lerp(x,y,s)
|
|
struct FxaaTex { SamplerState smpl; Texture2D tex; };
|
|
#define FxaaTexTop(t, p) t.tex.SampleLevel(t.smpl, p, 0.0)
|
|
#define FxaaTexOff(t, p, o, r) t.tex.SampleLevel(t.smpl, p, 0.0, o)
|
|
#define FxaaTexAlpha4(t, p, r) t.tex.GatherAlpha(t.smpl, p)
|
|
#define FxaaTexOffAlpha4(t, p, o, r) t.tex.GatherAlpha(t.smpl, p, o)
|
|
#endif
|
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
FXAA3 CONSOLE - 360 PIXEL SHADER
|
|
|
|
------------------------------------------------------------------------------
|
|
Might be some optimizations left here,
|
|
as of this latest change didn't have a PIX dump to verify if TEX bound.
|
|
============================================================================*/
|
|
#if (FXAA_360 == 1)
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 FxaaPixelShader(
|
|
// {xy} = center of pixel
|
|
float2 pos,
|
|
// {xy__} = upper left of pixel
|
|
// {__zw} = lower right of pixel
|
|
float4 posPos,
|
|
// {rgb_} = color in linear or perceptual color space
|
|
// {___a} = alpha output is junk value
|
|
FxaaTex tex,
|
|
// This must be from a constant/uniform.
|
|
// {xy} = rcpFrame not used on PC version of FXAA Console
|
|
float2 rcpFrame,
|
|
// This must be from a constant/uniform.
|
|
// {x___} = 2.0/screenWidthInPixels
|
|
// {_y__} = 2.0/screenHeightInPixels
|
|
// {__z_} = 0.5/screenWidthInPixels
|
|
// {___w} = 0.5/screenHeightInPixels
|
|
float4 rcpFrameOpt
|
|
) {
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 lumaNwNeSwSe;
|
|
lumaNwNeSwSe.x = FxaaTexTop(tex, posPos.xy).w;
|
|
lumaNwNeSwSe.y = FxaaTexTop(tex, posPos.zy).w;
|
|
lumaNwNeSwSe.z = FxaaTexTop(tex, posPos.xw).w;
|
|
lumaNwNeSwSe.w = FxaaTexTop(tex, posPos.zw).w;
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 rgbyM = FxaaTexTop(tex, pos.xy);
|
|
/*--------------------------------------------------------------------------*/
|
|
lumaNwNeSwSe.y += 1.0/384.0;
|
|
/*--------------------------------------------------------------------------*/
|
|
half2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
|
half2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
|
/*--------------------------------------------------------------------------*/
|
|
half lumaMin = min(lumaMinTemp.x, lumaMinTemp.y);
|
|
half lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y);
|
|
/*--------------------------------------------------------------------------*/
|
|
half lumaMinM = min(lumaMin, rgbyM.w);
|
|
half lumaMaxM = max(lumaMax, rgbyM.w);
|
|
if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE__EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE__EDGE_THRESHOLD)) return rgbyM;
|
|
/*--------------------------------------------------------------------------*/
|
|
half2 dir;
|
|
dir.x = dot(lumaNwNeSwSe, float4(-1.0, -1.0, 1.0, 1.0));
|
|
dir.y = dot(lumaNwNeSwSe, float4( 1.0, -1.0, 1.0,-1.0));
|
|
/*--------------------------------------------------------------------------*/
|
|
half2 dir1;
|
|
dir1 = normalize(dir.xy);
|
|
/*--------------------------------------------------------------------------*/
|
|
half dirAbsMinTimesC = min(abs(dir1.x), abs(dir1.y)) * FXAA_CONSOLE__EDGE_SHARPNESS;
|
|
half2 dir2;
|
|
dir2 = clamp(dir1.xy / dirAbsMinTimesC, -2.0, 2.0);
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 rgbyN1 = FxaaTexTop(tex, pos.xy - dir1 * rcpFrameOpt.zw);
|
|
half4 rgbyP1 = FxaaTexTop(tex, pos.xy + dir1 * rcpFrameOpt.zw);
|
|
half4 rgbyN2 = FxaaTexTop(tex, pos.xy - dir2 * rcpFrameOpt.xy);
|
|
half4 rgbyP2 = FxaaTexTop(tex, pos.xy + dir2 * rcpFrameOpt.xy);
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 rgbyA = rgbyN1 * 0.5 + rgbyP1 * 0.5;
|
|
half4 rgbyB = rgbyN2 * 0.25 + rgbyP2 * 0.25 + rgbyA * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
bool twoTap = (rgbyB.w < lumaMin) || (rgbyB.w > lumaMax);
|
|
if(twoTap) rgbyB.xyz = rgbyA.xyz;
|
|
return rgbyB; }
|
|
/*==========================================================================*/
|
|
#endif
|
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
FXAA3 CONSOLE - 360 PIXEL SHADER OPTIMIZED PROTOTYPE
|
|
|
|
------------------------------------------------------------------------------
|
|
This prototype optimized version thanks to suggestions from Andy Luedke.
|
|
Should be fully tex bound in all cases.
|
|
As of the FXAA 3.10 release I have not tested this code,
|
|
but at least the missing ";" was fixed.
|
|
If it does not work, please let me know so I can fix it.
|
|
------------------------------------------------------------------------------
|
|
Extra requirements,
|
|
(1.) Different inputs: no posPos.
|
|
(2.) Different inputs: alias three samplers with different exp bias settings!
|
|
(3.) New constants: setup fxaaConst as described below.
|
|
============================================================================*/
|
|
#if (FXAA_360_OPT == 1)
|
|
/*--------------------------------------------------------------------------*/
|
|
[reduceTempRegUsage(4)]
|
|
float4 FxaaPixelShader(
|
|
// {xy} = center of pixel
|
|
float2 pos,
|
|
// Three samplers,
|
|
// texExpBias0 = exponent bias 0
|
|
// texExpBiasNeg1 = exponent bias -1
|
|
// texExpBiasNeg2 = exponent bias -2
|
|
// {rgb_} = color in linear or perceptual color space
|
|
// {___a} = alpha output is junk value
|
|
uniform sampler2D texExpBias0,
|
|
uniform sampler2D texExpBiasNeg1,
|
|
uniform sampler2D texExpBiasNeg2,
|
|
// These must be in physical constant registers and NOT immedates
|
|
// Immedates will result in compiler un-optimizing
|
|
// width = screen width in pixels
|
|
// height = screen height in pixels
|
|
fxaaConstDir, // float4(1.0, -1.0, 0.25, -0.25);
|
|
fxaaConstInner, // float4(0.5/width, 0.5/height, -0.5/width, -0.5/height);
|
|
fxaaConstOuter // float4(8.0/width, 8.0/height, -4.0/width, -4.0/height);
|
|
) {
|
|
/*--------------------------------------------------------------------------*/
|
|
float4 lumaNwNeSwSe;
|
|
asm {
|
|
tfetch2D lumaNwNeSwSe.w___, texExpBias0, pos.xy, OffsetX = -0.5, OffsetY = -0.5, UseComputedLOD=false
|
|
tfetch2D lumaNwNeSwSe._w__, texExpBias0, pos.xy, OffsetX = 0.5, OffsetY = -0.5, UseComputedLOD=false
|
|
tfetch2D lumaNwNeSwSe.__w_, texExpBias0, pos.xy, OffsetX = -0.5, OffsetY = 0.5, UseComputedLOD=false
|
|
tfetch2D lumaNwNeSwSe.___w, texExpBias0, pos.xy, OffsetX = 0.5, OffsetY = 0.5, UseComputedLOD=false
|
|
};
|
|
/*--------------------------------------------------------------------------*/
|
|
lumaNwNeSwSe.y += 1.0/384.0;
|
|
float2 lumaMinTemp = min(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
|
float2 lumaMaxTemp = max(lumaNwNeSwSe.xy, lumaNwNeSwSe.zw);
|
|
float lumaMin = min(lumaMinTemp.x, lumaMinTemp.y);
|
|
float lumaMax = max(lumaMaxTemp.x, lumaMaxTemp.y);
|
|
/*--------------------------------------------------------------------------*/
|
|
float4 rgbyM = tex2Dlod(texExpBias0, float4(pos.xy, 0.0, 0.0));
|
|
float4 lumaMinM = min(lumaMin, rgbyM.w);
|
|
float4 lumaMaxM = max(lumaMax, rgbyM.w);
|
|
if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE__EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE__EDGE_THRESHOLD)) return rgbyM;
|
|
/*--------------------------------------------------------------------------*/
|
|
float2 dir;
|
|
dir.x = dot(lumaNwNeSwSe, fxaaConstDir.yyxx);
|
|
dir.y = dot(lumaNwNeSwSe, fxaaConstDir.xyxy);
|
|
dir = normalize(dir);
|
|
/*--------------------------------------------------------------------------*/
|
|
float4 dir1 = dir.xyxy * fxaaConstInner.xyzw;
|
|
/*--------------------------------------------------------------------------*/
|
|
float4 dir2;
|
|
float dirAbsMinTimesC = min(abs(dir.x), abs(dir.y));
|
|
dir2 = saturate(fxaaConstOuter.zzww * dir.xyxy / FXAA_CONSOLE__EDGE_SHARPNESS / dirAbsMinTimesC + 0.5);
|
|
dir2 = dir2 * fxaaConstOuter.xyxy + fxaaConstOuter.zwzw;
|
|
/*--------------------------------------------------------------------------*/
|
|
float4 rgbyN1 = tex2Dlod(texExpBiasNeg1, float4(pos.xy + dir1.xy, 0.0, 0.0));
|
|
float4 rgbyP1 = tex2Dlod(texExpBiasNeg1, float4(pos.xy + dir1.zw, 0.0, 0.0));
|
|
float4 rgbyN2 = tex2Dlod(texExpBiasNeg2, float4(pos.xy + dir2.xy, 0.0, 0.0));
|
|
float4 rgbyP2 = tex2Dlod(texExpBiasNeg2, float4(pos.xy + dir2.zw, 0.0, 0.0));
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 rgbyA = rgbyN1 + rgbyP1;
|
|
half4 rgbyB = rgbyN2 + rgbyP2 * 0.5 + rgbyA;
|
|
/*--------------------------------------------------------------------------*/
|
|
float4 rgbyR = ((rgbyB.w - lumaMax) > 0.0) ? rgbyA : rgbyB;
|
|
rgbyR = ((rgbyB.w - lumaMin) > 0.0) ? rgbyR : rgbyA;
|
|
return rgbyR; }
|
|
/*==========================================================================*/
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (NO EARLY EXIT)
|
|
|
|
==============================================================================
|
|
The code below does not exactly match the assembly.
|
|
I have a feeling that 12 cycles is possible, but was not able to get there.
|
|
Might have to increase register count to get full performance.
|
|
Note this shader does not use perspective interpolation.
|
|
|
|
Use the following cgc options,
|
|
|
|
--fenable-bx2 --fastmath --fastprecision --nofloatbindings
|
|
|
|
------------------------------------------------------------------------------
|
|
NVSHADERPERF OUTPUT
|
|
------------------------------------------------------------------------------
|
|
For reference and to aid in debug, output of NVShaderPerf should match this,
|
|
|
|
Shader to schedule:
|
|
0: texpkb h0.w(TRUE), v5.zyxx, #0
|
|
2: addh h2.z(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
|
|
4: texpkb h0.w(TRUE), v5.xwxx, #0
|
|
6: addh h0.z(TRUE), -h2, h0.w
|
|
7: texpkb h1.w(TRUE), v5, #0
|
|
9: addh h0.x(TRUE), h0.z, -h1.w
|
|
10: addh h3.w(TRUE), h0.z, h1
|
|
11: texpkb h2.w(TRUE), v5.zwzz, #0
|
|
13: addh h0.z(TRUE), h3.w, -h2.w
|
|
14: addh h0.x(TRUE), h2.w, h0
|
|
15: nrmh h1.xz(TRUE), h0_n
|
|
16: minh_m8 h0.x(TRUE), |h1|, |h1.z|
|
|
17: maxh h4.w(TRUE), h0, h1
|
|
18: divx h2.xy(TRUE), h1_n.xzzw, h0_n
|
|
19: movr r1.zw(TRUE), v4.xxxy
|
|
20: madr r2.xz(TRUE), -h1, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zzww, r1.zzww
|
|
22: minh h5.w(TRUE), h0, h1
|
|
23: texpkb h0(TRUE), r2.xzxx, #0
|
|
25: madr r0.zw(TRUE), h1.xzxz, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w), r1
|
|
27: maxh h4.x(TRUE), h2.z, h2.w
|
|
28: texpkb h1(TRUE), r0.zwzz, #0
|
|
30: addh_d2 h1(TRUE), h0, h1
|
|
31: madr r0.xy(TRUE), -h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
|
33: texpkb h0(TRUE), r0, #0
|
|
35: minh h4.z(TRUE), h2, h2.w
|
|
36: fenct TRUE
|
|
37: madr r1.xy(TRUE), h2, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
|
39: texpkb h2(TRUE), r1, #0
|
|
41: addh_d2 h0(TRUE), h0, h2
|
|
42: maxh h2.w(TRUE), h4, h4.x
|
|
43: minh h2.x(TRUE), h5.w, h4.z
|
|
44: addh_d2 h0(TRUE), h0, h1
|
|
45: slth h2.x(TRUE), h0.w, h2
|
|
46: sgth h2.w(TRUE), h0, h2
|
|
47: movh h0(TRUE), h0
|
|
48: addx.c0 rc(TRUE), h2, h2.w
|
|
49: movh h0(c0.NE.x), h1
|
|
|
|
IPU0 ------ Simplified schedule: --------
|
|
Pass | Unit | uOp | PC: Op
|
|
-----+--------+------+-------------------------
|
|
1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
|
| TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
|
| SCB1 | add | 2: ADDh h2.z, h0.--w-, const.--x-;
|
|
| | |
|
|
2 | SCT0/1 | mov | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
|
| TEX | txl | 4: TXLr h0.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
|
| SCB1 | add | 6: ADDh h0.z,-h2, h0.--w-;
|
|
| | |
|
|
3 | SCT0/1 | mov | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
|
|
| TEX | txl | 7: TXLr h1.w, g[TEX1], const.xxxx, TEX0;
|
|
| SCB0 | add | 9: ADDh h0.x, h0.z---,-h1.w---;
|
|
| SCB1 | add | 10: ADDh h3.w, h0.---z, h1;
|
|
| | |
|
|
4 | SCT0/1 | mov | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
|
| TEX | txl | 11: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
|
| SCB0 | add | 14: ADDh h0.x, h2.w---, h0;
|
|
| SCB1 | add | 13: ADDh h0.z, h3.--w-,-h2.--w-;
|
|
| | |
|
|
5 | SCT1 | mov | 15: NRMh h1.xz, h0;
|
|
| SRB | nrm | 15: NRMh h1.xz, h0;
|
|
| SCB0 | min | 16: MINh*8 h0.x, |h1|, |h1.z---|;
|
|
| SCB1 | max | 17: MAXh h4.w, h0, h1;
|
|
| | |
|
|
6 | SCT0 | div | 18: DIVx h2.xy, h1.xz--, h0;
|
|
| SCT1 | mov | 19: MOVr r1.zw, g[TEX0].--xy;
|
|
| SCB0 | mad | 20: MADr r2.xz,-h1, const.z-w-, r1.z-w-;
|
|
| SCB1 | min | 22: MINh h5.w, h0, h1;
|
|
| | |
|
|
7 | SCT0/1 | mov | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
|
|
| TEX | txl | 23: TXLr h0, r2.xzxx, const.xxxx, TEX0;
|
|
| SCB0 | max | 27: MAXh h4.x, h2.z---, h2.w---;
|
|
| SCB1 | mad | 25: MADr r0.zw, h1.--xz, const, r1;
|
|
| | |
|
|
8 | SCT0/1 | mov | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
|
|
| TEX | txl | 28: TXLr h1, r0.zwzz, const.xxxx, TEX0;
|
|
| SCB0/1 | add | 30: ADDh/2 h1, h0, h1;
|
|
| | |
|
|
9 | SCT0 | mad | 31: MADr r0.xy,-h2, const.xy--, r1.zw--;
|
|
| SCT1 | mov | 33: TXLr h0, r0, const.zzzz, TEX0;
|
|
| TEX | txl | 33: TXLr h0, r0, const.zzzz, TEX0;
|
|
| SCB1 | min | 35: MINh h4.z, h2, h2.--w-;
|
|
| | |
|
|
10 | SCT0 | mad | 37: MADr r1.xy, h2, const.xy--, r1.zw--;
|
|
| SCT1 | mov | 39: TXLr h2, r1, const.zzzz, TEX0;
|
|
| TEX | txl | 39: TXLr h2, r1, const.zzzz, TEX0;
|
|
| SCB0/1 | add | 41: ADDh/2 h0, h0, h2;
|
|
| | |
|
|
11 | SCT0 | min | 43: MINh h2.x, h5.w---, h4.z---;
|
|
| SCT1 | max | 42: MAXh h2.w, h4, h4.---x;
|
|
| SCB0/1 | add | 44: ADDh/2 h0, h0, h1;
|
|
| | |
|
|
12 | SCT0 | set | 45: SLTh h2.x, h0.w---, h2;
|
|
| SCT1 | set | 46: SGTh h2.w, h0, h2;
|
|
| SCB0/1 | mul | 47: MOVh h0, h0;
|
|
| | |
|
|
13 | SCT0 | mad | 48: ADDxc0_s rc, h2, h2.w---;
|
|
| SCB0/1 | mul | 49: MOVh h0(NE0.xxxx), h1;
|
|
|
|
Pass SCT TEX SCB
|
|
1: 0% 100% 25%
|
|
2: 0% 100% 25%
|
|
3: 0% 100% 50%
|
|
4: 0% 100% 50%
|
|
5: 0% 0% 50%
|
|
6: 100% 0% 75%
|
|
7: 0% 100% 75%
|
|
8: 0% 100% 100%
|
|
9: 0% 100% 25%
|
|
10: 0% 100% 100%
|
|
11: 50% 0% 100%
|
|
12: 50% 0% 100%
|
|
13: 25% 0% 100%
|
|
|
|
MEAN: 17% 61% 67%
|
|
|
|
Pass SCT0 SCT1 TEX SCB0 SCB1
|
|
1: 0% 0% 100% 0% 100%
|
|
2: 0% 0% 100% 0% 100%
|
|
3: 0% 0% 100% 100% 100%
|
|
4: 0% 0% 100% 100% 100%
|
|
5: 0% 0% 0% 100% 100%
|
|
6: 100% 100% 0% 100% 100%
|
|
7: 0% 0% 100% 100% 100%
|
|
8: 0% 0% 100% 100% 100%
|
|
9: 0% 0% 100% 0% 100%
|
|
10: 0% 0% 100% 100% 100%
|
|
11: 100% 100% 0% 100% 100%
|
|
12: 100% 100% 0% 100% 100%
|
|
13: 100% 0% 0% 100% 100%
|
|
|
|
MEAN: 30% 23% 61% 76% 100%
|
|
Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
|
|
Results 13 cycles, 3 r regs, 923,076,923 pixels/s
|
|
============================================================================*/
|
|
#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 0)
|
|
/*--------------------------------------------------------------------------*/
|
|
#pragma disablepc all
|
|
#pragma option O3
|
|
#pragma option OutColorPrec=fp16
|
|
#pragma texformat default RGBA8
|
|
/*==========================================================================*/
|
|
half4 FxaaPixelShader(
|
|
// {xy} = center of pixel
|
|
float2 pos,
|
|
// {xy__} = upper left of pixel
|
|
// {__zw} = lower right of pixel
|
|
float4 posPos,
|
|
// {rgb_} = color in linear or perceptual color space
|
|
// {___a} = luma in perceptual color space (not linear)
|
|
sampler2D tex,
|
|
// This must be from a constant/uniform.
|
|
// {xy} = rcpFrame not used on PS3
|
|
float2 rcpFrame,
|
|
// This must be from a constant/uniform.
|
|
// {x___} = 2.0/screenWidthInPixels
|
|
// {_y__} = 2.0/screenHeightInPixels
|
|
// {__z_} = 0.5/screenWidthInPixels
|
|
// {___w} = 0.5/screenHeightInPixels
|
|
float4 rcpFrameOpt
|
|
) {
|
|
/*--------------------------------------------------------------------------*/
|
|
// (1)
|
|
half4 dir;
|
|
half4 lumaNe = h4tex2Dlod(tex, half4(posPos.zy, 0, 0));
|
|
lumaNe.w += half(1.0/512.0);
|
|
dir.x = -lumaNe.w;
|
|
dir.z = -lumaNe.w;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (2)
|
|
half4 lumaSw = h4tex2Dlod(tex, half4(posPos.xw, 0, 0));
|
|
dir.x += lumaSw.w;
|
|
dir.z += lumaSw.w;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (3)
|
|
half4 lumaNw = h4tex2Dlod(tex, half4(posPos.xy, 0, 0));
|
|
dir.x -= lumaNw.w;
|
|
dir.z += lumaNw.w;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (4)
|
|
half4 lumaSe = h4tex2Dlod(tex, half4(posPos.zw, 0, 0));
|
|
dir.x += lumaSe.w;
|
|
dir.z -= lumaSe.w;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (5)
|
|
half4 dir1_pos;
|
|
dir1_pos.xy = normalize(dir.xyz).xz;
|
|
half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__EDGE_SHARPNESS);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (6)
|
|
half4 dir2_pos;
|
|
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));
|
|
dir1_pos.zw = pos.xy;
|
|
dir2_pos.zw = pos.xy;
|
|
half4 temp1N;
|
|
temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (7)
|
|
temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
|
|
half4 rgby1;
|
|
rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (8)
|
|
rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
|
|
rgby1 = (temp1N + rgby1) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (9)
|
|
half4 temp2N;
|
|
temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy;
|
|
temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
|
|
/*--------------------------------------------------------------------------*/
|
|
// (10)
|
|
half4 rgby2;
|
|
rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy;
|
|
rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
|
|
rgby2 = (temp2N + rgby2) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (11)
|
|
// compilier moves these scalar ops up to other cycles
|
|
half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
|
|
half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
|
|
rgby2 = (rgby2 + rgby1) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (12)
|
|
bool twoTapLt = rgby2.w < lumaMin;
|
|
bool twoTapGt = rgby2.w > lumaMax;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (13)
|
|
if(twoTapLt || twoTapGt) rgby2 = rgby1;
|
|
/*--------------------------------------------------------------------------*/
|
|
return rgby2; }
|
|
/*==========================================================================*/
|
|
#endif
|
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
FXAA3 CONSOLE - OPTIMIZED PS3 PIXEL SHADER (WITH EARLY EXIT)
|
|
|
|
==============================================================================
|
|
The code mostly matches the assembly.
|
|
I have a feeling that 14 cycles is possible, but was not able to get there.
|
|
Might have to increase register count to get full performance.
|
|
Note this shader does not use perspective interpolation.
|
|
|
|
Use the following cgc options,
|
|
|
|
--fenable-bx2 --fastmath --fastprecision --nofloatbindings
|
|
|
|
------------------------------------------------------------------------------
|
|
NVSHADERPERF OUTPUT
|
|
------------------------------------------------------------------------------
|
|
For reference and to aid in debug, output of NVShaderPerf should match this,
|
|
|
|
Shader to schedule:
|
|
0: texpkb h0.w(TRUE), v5.zyxx, #0
|
|
2: addh h2.y(TRUE), h0.w, constant(0.001953, 0.000000, 0.000000, 0.000000).x
|
|
4: texpkb h1.w(TRUE), v5.xwxx, #0
|
|
6: addh h0.x(TRUE), h1.w, -h2.y
|
|
7: texpkb h2.w(TRUE), v5.zwzz, #0
|
|
9: minh h4.w(TRUE), h2.y, h2
|
|
10: maxh h5.x(TRUE), h2.y, h2.w
|
|
11: texpkb h0.w(TRUE), v5, #0
|
|
13: addh h3.w(TRUE), -h0, h0.x
|
|
14: addh h0.x(TRUE), h0.w, h0
|
|
15: addh h0.z(TRUE), -h2.w, h0.x
|
|
16: addh h0.x(TRUE), h2.w, h3.w
|
|
17: minh h5.y(TRUE), h0.w, h1.w
|
|
18: nrmh h2.xz(TRUE), h0_n
|
|
19: minh_m8 h2.w(TRUE), |h2.x|, |h2.z|
|
|
20: divx h4.xy(TRUE), h2_n.xzzw, h2_n.w
|
|
21: movr r1.zw(TRUE), v4.xxxy
|
|
22: maxh h2.w(TRUE), h0, h1
|
|
23: fenct TRUE
|
|
24: madr r0.xy(TRUE), -h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
|
|
26: texpkb h0(TRUE), r0, #0
|
|
28: maxh h5.x(TRUE), h2.w, h5
|
|
29: minh h5.w(TRUE), h5.y, h4
|
|
30: madr r1.xy(TRUE), h2.xzzw, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).zwzz, r1.zwzz
|
|
32: texpkb h2(TRUE), r1, #0
|
|
34: addh_d2 h2(TRUE), h0, h2
|
|
35: texpkb h1(TRUE), v4, #0
|
|
37: maxh h5.y(TRUE), h5.x, h1.w
|
|
38: minh h4.w(TRUE), h1, h5
|
|
39: madr r0.xy(TRUE), -h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
|
41: texpkb h0(TRUE), r0, #0
|
|
43: addh_m8 h5.z(TRUE), h5.y, -h4.w
|
|
44: madr r2.xy(TRUE), h4, constant(cConst5.x, cConst5.y, cConst5.z, cConst5.w).xyxx, r1.zwzz
|
|
46: texpkb h3(TRUE), r2, #0
|
|
48: addh_d2 h0(TRUE), h0, h3
|
|
49: addh_d2 h3(TRUE), h0, h2
|
|
50: movh h0(TRUE), h3
|
|
51: slth h3.x(TRUE), h3.w, h5.w
|
|
52: sgth h3.w(TRUE), h3, h5.x
|
|
53: addx.c0 rc(TRUE), h3.x, h3
|
|
54: slth.c0 rc(TRUE), h5.z, h5
|
|
55: movh h0(c0.NE.w), h2
|
|
56: movh h0(c0.NE.x), h1
|
|
|
|
IPU0 ------ Simplified schedule: --------
|
|
Pass | Unit | uOp | PC: Op
|
|
-----+--------+------+-------------------------
|
|
1 | SCT0/1 | mov | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
|
| TEX | txl | 0: TXLr h0.w, g[TEX1].zyxx, const.xxxx, TEX0;
|
|
| SCB0 | add | 2: ADDh h2.y, h0.-w--, const.-x--;
|
|
| | |
|
|
2 | SCT0/1 | mov | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
|
| TEX | txl | 4: TXLr h1.w, g[TEX1].xwxx, const.xxxx, TEX0;
|
|
| SCB0 | add | 6: ADDh h0.x, h1.w---,-h2.y---;
|
|
| | |
|
|
3 | SCT0/1 | mov | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
|
| TEX | txl | 7: TXLr h2.w, g[TEX1].zwzz, const.xxxx, TEX0;
|
|
| SCB0 | max | 10: MAXh h5.x, h2.y---, h2.w---;
|
|
| SCB1 | min | 9: MINh h4.w, h2.---y, h2;
|
|
| | |
|
|
4 | SCT0/1 | mov | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
|
|
| TEX | txl | 11: TXLr h0.w, g[TEX1], const.xxxx, TEX0;
|
|
| SCB0 | add | 14: ADDh h0.x, h0.w---, h0;
|
|
| SCB1 | add | 13: ADDh h3.w,-h0, h0.---x;
|
|
| | |
|
|
5 | SCT0 | mad | 16: ADDh h0.x, h2.w---, h3.w---;
|
|
| SCT1 | mad | 15: ADDh h0.z,-h2.--w-, h0.--x-;
|
|
| SCB0 | min | 17: MINh h5.y, h0.-w--, h1.-w--;
|
|
| | |
|
|
6 | SCT1 | mov | 18: NRMh h2.xz, h0;
|
|
| SRB | nrm | 18: NRMh h2.xz, h0;
|
|
| SCB1 | min | 19: MINh*8 h2.w, |h2.---x|, |h2.---z|;
|
|
| | |
|
|
7 | SCT0 | div | 20: DIVx h4.xy, h2.xz--, h2.ww--;
|
|
| SCT1 | mov | 21: MOVr r1.zw, g[TEX0].--xy;
|
|
| SCB1 | max | 22: MAXh h2.w, h0, h1;
|
|
| | |
|
|
8 | SCT0 | mad | 24: MADr r0.xy,-h2.xz--, const.zw--, r1.zw--;
|
|
| SCT1 | mov | 26: TXLr h0, r0, const.xxxx, TEX0;
|
|
| TEX | txl | 26: TXLr h0, r0, const.xxxx, TEX0;
|
|
| SCB0 | max | 28: MAXh h5.x, h2.w---, h5;
|
|
| SCB1 | min | 29: MINh h5.w, h5.---y, h4;
|
|
| | |
|
|
9 | SCT0 | mad | 30: MADr r1.xy, h2.xz--, const.zw--, r1.zw--;
|
|
| SCT1 | mov | 32: TXLr h2, r1, const.xxxx, TEX0;
|
|
| TEX | txl | 32: TXLr h2, r1, const.xxxx, TEX0;
|
|
| SCB0/1 | add | 34: ADDh/2 h2, h0, h2;
|
|
| | |
|
|
10 | SCT0/1 | mov | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
|
|
| TEX | txl | 35: TXLr h1, g[TEX0], const.xxxx, TEX0;
|
|
| SCB0 | max | 37: MAXh h5.y, h5.-x--, h1.-w--;
|
|
| SCB1 | min | 38: MINh h4.w, h1, h5;
|
|
| | |
|
|
11 | SCT0 | mad | 39: MADr r0.xy,-h4, const.xy--, r1.zw--;
|
|
| SCT1 | mov | 41: TXLr h0, r0, const.zzzz, TEX0;
|
|
| TEX | txl | 41: TXLr h0, r0, const.zzzz, TEX0;
|
|
| SCB0 | mad | 44: MADr r2.xy, h4, const.xy--, r1.zw--;
|
|
| SCB1 | add | 43: ADDh*8 h5.z, h5.--y-,-h4.--w-;
|
|
| | |
|
|
12 | SCT0/1 | mov | 46: TXLr h3, r2, const.xxxx, TEX0;
|
|
| TEX | txl | 46: TXLr h3, r2, const.xxxx, TEX0;
|
|
| SCB0/1 | add | 48: ADDh/2 h0, h0, h3;
|
|
| | |
|
|
13 | SCT0/1 | mad | 49: ADDh/2 h3, h0, h2;
|
|
| SCB0/1 | mul | 50: MOVh h0, h3;
|
|
| | |
|
|
14 | SCT0 | set | 51: SLTh h3.x, h3.w---, h5.w---;
|
|
| SCT1 | set | 52: SGTh h3.w, h3, h5.---x;
|
|
| SCB0 | set | 54: SLThc0 rc, h5.z---, h5;
|
|
| SCB1 | add | 53: ADDxc0_s rc, h3.---x, h3;
|
|
| | |
|
|
15 | SCT0/1 | mul | 55: MOVh h0(NE0.wwww), h2;
|
|
| SCB0/1 | mul | 56: MOVh h0(NE0.xxxx), h1;
|
|
|
|
Pass SCT TEX SCB
|
|
1: 0% 100% 25%
|
|
2: 0% 100% 25%
|
|
3: 0% 100% 50%
|
|
4: 0% 100% 50%
|
|
5: 50% 0% 25%
|
|
6: 0% 0% 25%
|
|
7: 100% 0% 25%
|
|
8: 0% 100% 50%
|
|
9: 0% 100% 100%
|
|
10: 0% 100% 50%
|
|
11: 0% 100% 75%
|
|
12: 0% 100% 100%
|
|
13: 100% 0% 100%
|
|
14: 50% 0% 50%
|
|
15: 100% 0% 100%
|
|
|
|
MEAN: 26% 60% 56%
|
|
|
|
Pass SCT0 SCT1 TEX SCB0 SCB1
|
|
1: 0% 0% 100% 100% 0%
|
|
2: 0% 0% 100% 100% 0%
|
|
3: 0% 0% 100% 100% 100%
|
|
4: 0% 0% 100% 100% 100%
|
|
5: 100% 100% 0% 100% 0%
|
|
6: 0% 0% 0% 0% 100%
|
|
7: 100% 100% 0% 0% 100%
|
|
8: 0% 0% 100% 100% 100%
|
|
9: 0% 0% 100% 100% 100%
|
|
10: 0% 0% 100% 100% 100%
|
|
11: 0% 0% 100% 100% 100%
|
|
12: 0% 0% 100% 100% 100%
|
|
13: 100% 100% 0% 100% 100%
|
|
14: 100% 100% 0% 100% 100%
|
|
15: 100% 100% 0% 100% 100%
|
|
|
|
MEAN: 33% 33% 60% 86% 80%
|
|
Fragment Performance Setup: Driver RSX Compiler, GPU RSX, Flags 0x5
|
|
Results 15 cycles, 3 r regs, 800,000,000 pixels/s
|
|
============================================================================*/
|
|
#if (FXAA_PS3 == 1) && (FXAA_EARLY_EXIT == 1)
|
|
/*--------------------------------------------------------------------------*/
|
|
#pragma disablepc all
|
|
#pragma option O2
|
|
#pragma option OutColorPrec=fp16
|
|
#pragma texformat default RGBA8
|
|
/*==========================================================================*/
|
|
half4 FxaaPixelShader(
|
|
// {xy} = center of pixel
|
|
float2 pos,
|
|
// {xy__} = upper left of pixel
|
|
// {__zw} = lower right of pixel
|
|
float4 posPos,
|
|
// {rgb_} = color in linear or perceptual color space
|
|
// {___a} = luma in perceptual color space (not linear)
|
|
sampler2D tex,
|
|
// This must be from a constant/uniform.
|
|
// {xy} = rcpFrame not used on PS3
|
|
float2 rcpFrame,
|
|
// This must be from a constant/uniform.
|
|
// {x___} = 2.0/screenWidthInPixels
|
|
// {_y__} = 2.0/screenHeightInPixels
|
|
// {__z_} = 0.5/screenWidthInPixels
|
|
// {___w} = 0.5/screenHeightInPixels
|
|
float4 rcpFrameOpt
|
|
) {
|
|
/*--------------------------------------------------------------------------*/
|
|
// (1)
|
|
half4 rgbyNe = h4tex2Dlod(tex, half4(posPos.zy, 0, 0));
|
|
half lumaNe = rgbyNe.w + half(1.0/512.0);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (2)
|
|
half4 lumaSw = h4tex2Dlod(tex, half4(posPos.xw, 0, 0));
|
|
half lumaSwNegNe = lumaSw.w - lumaNe;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (3)
|
|
half4 lumaNw = h4tex2Dlod(tex, half4(posPos.xy, 0, 0));
|
|
half lumaMaxNwSw = max(lumaNw.w, lumaSw.w);
|
|
half lumaMinNwSw = min(lumaNw.w, lumaSw.w);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (4)
|
|
half4 lumaSe = h4tex2Dlod(tex, half4(posPos.zw, 0, 0));
|
|
half dirZ = lumaNw.w + lumaSwNegNe;
|
|
half dirX = -lumaNw.w + lumaSwNegNe;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (5)
|
|
half3 dir;
|
|
dir.y = 0.0;
|
|
dir.x = lumaSe.w + dirX;
|
|
dir.z = -lumaSe.w + dirZ;
|
|
half lumaMinNeSe = min(lumaNe, lumaSe.w);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (6)
|
|
half4 dir1_pos;
|
|
dir1_pos.xy = normalize(dir).xz;
|
|
half dirAbsMinTimes8 = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__EDGE_SHARPNESS);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (7)
|
|
half4 dir2_pos;
|
|
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimes8, half(-2.0), half(2.0));
|
|
dir1_pos.zw = pos.xy;
|
|
dir2_pos.zw = pos.xy;
|
|
half lumaMaxNeSe = max(lumaNe, lumaSe.w);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (8)
|
|
half4 temp1N;
|
|
temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw;
|
|
temp1N = h4tex2Dlod(tex, half4(temp1N.xy, 0.0, 0.0));
|
|
half lumaMax = max(lumaMaxNwSw, lumaMaxNeSe);
|
|
half lumaMin = min(lumaMinNwSw, lumaMinNeSe);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (9)
|
|
half4 rgby1;
|
|
rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw;
|
|
rgby1 = h4tex2Dlod(tex, half4(rgby1.xy, 0.0, 0.0));
|
|
rgby1 = (temp1N + rgby1) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (10)
|
|
half4 rgbyM = h4tex2Dlod(tex, half4(pos.xy, 0.0, 0.0));
|
|
half lumaMaxM = max(lumaMax, rgbyM.w);
|
|
half lumaMinM = min(lumaMin, rgbyM.w);
|
|
/*--------------------------------------------------------------------------*/
|
|
// (11)
|
|
half4 temp2N;
|
|
temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy;
|
|
temp2N = h4tex2Dlod(tex, half4(temp2N.xy, 0.0, 0.0));
|
|
half4 rgby2;
|
|
rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy;
|
|
half lumaRangeM = (lumaMaxM - lumaMinM) / FXAA_CONSOLE__EDGE_THRESHOLD;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (12)
|
|
rgby2 = h4tex2Dlod(tex, half4(rgby2.xy, 0.0, 0.0));
|
|
rgby2 = (temp2N + rgby2) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (13)
|
|
rgby2 = (rgby2 + rgby1) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (14)
|
|
bool twoTapLt = rgby2.w < lumaMin;
|
|
bool twoTapGt = rgby2.w > lumaMax;
|
|
bool earlyExit = lumaRangeM < lumaMax;
|
|
bool twoTap = twoTapLt || twoTapGt;
|
|
/*--------------------------------------------------------------------------*/
|
|
// (15)
|
|
if(twoTap) rgby2 = rgby1;
|
|
if(earlyExit) rgby2 = rgbyM;
|
|
/*--------------------------------------------------------------------------*/
|
|
return rgby2; }
|
|
/*==========================================================================*/
|
|
#endif
|
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
FXAA3 CONSOLE - PC PIXEL SHADER
|
|
|
|
------------------------------------------------------------------------------
|
|
Using a modified version of the PS3 version here to best target old hardware.
|
|
============================================================================*/
|
|
#if (FXAA_PC_CONSOLE == 1)
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 FxaaPixelShader(
|
|
// {xy} = center of pixel
|
|
float2 pos,
|
|
// {xy__} = upper left of pixel
|
|
// {__zw} = lower right of pixel
|
|
float4 posPos,
|
|
// {rgb_} = color in linear or perceptual color space
|
|
// {___a} = alpha output is junk value
|
|
FxaaTex tex,
|
|
// This must be from a constant/uniform.
|
|
// {xy} = rcpFrame not used on PC version of FXAA Console
|
|
float2 rcpFrame,
|
|
// This must be from a constant/uniform.
|
|
// {x___} = 2.0/screenWidthInPixels
|
|
// {_y__} = 2.0/screenHeightInPixels
|
|
// {__z_} = 0.5/screenWidthInPixels
|
|
// {___w} = 0.5/screenHeightInPixels
|
|
float4 rcpFrameOpt
|
|
) {
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 dir;
|
|
dir.y = 0.0;
|
|
half4 lumaNe = FxaaTexTop(tex, posPos.zy);
|
|
lumaNe.w += half(1.0/384.0);
|
|
dir.x = -lumaNe.w;
|
|
dir.z = -lumaNe.w;
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 lumaSw = FxaaTexTop(tex, posPos.xw);
|
|
dir.x += lumaSw.w;
|
|
dir.z += lumaSw.w;
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 lumaNw = FxaaTexTop(tex, posPos.xy);
|
|
dir.x -= lumaNw.w;
|
|
dir.z += lumaNw.w;
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 lumaSe = FxaaTexTop(tex, posPos.zw);
|
|
dir.x += lumaSe.w;
|
|
dir.z -= lumaSe.w;
|
|
/*==========================================================================*/
|
|
#if (FXAA_EARLY_EXIT == 1)
|
|
half4 rgbyM = FxaaTexTop(tex, pos.xy);
|
|
/*--------------------------------------------------------------------------*/
|
|
half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
|
|
half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
|
|
/*--------------------------------------------------------------------------*/
|
|
half lumaMinM = min(lumaMin, rgbyM.w);
|
|
half lumaMaxM = max(lumaMax, rgbyM.w);
|
|
/*--------------------------------------------------------------------------*/
|
|
if((lumaMaxM - lumaMinM) < max(FXAA_CONSOLE__EDGE_THRESHOLD_MIN, lumaMax * FXAA_CONSOLE__EDGE_THRESHOLD))
|
|
#if (FXAA_DISCARD == 1)
|
|
FxaaDiscard;
|
|
#else
|
|
return rgbyM;
|
|
#endif
|
|
#endif
|
|
/*==========================================================================*/
|
|
half4 dir1_pos;
|
|
dir1_pos.xy = normalize(dir.xyz).xz;
|
|
half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(FXAA_CONSOLE__EDGE_SHARPNESS);
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 dir2_pos;
|
|
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));
|
|
dir1_pos.zw = pos.xy;
|
|
dir2_pos.zw = pos.xy;
|
|
half4 temp1N;
|
|
temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw;
|
|
/*--------------------------------------------------------------------------*/
|
|
temp1N = FxaaTexTop(tex, temp1N.xy);
|
|
half4 rgby1;
|
|
rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw;
|
|
/*--------------------------------------------------------------------------*/
|
|
rgby1 = FxaaTexTop(tex, rgby1.xy);
|
|
rgby1 = (temp1N + rgby1) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 temp2N;
|
|
temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy;
|
|
temp2N = FxaaTexTop(tex, temp2N.xy);
|
|
/*--------------------------------------------------------------------------*/
|
|
half4 rgby2;
|
|
rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy;
|
|
rgby2 = FxaaTexTop(tex, rgby2.xy);
|
|
rgby2 = (temp2N + rgby2) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_EARLY_EXIT == 0)
|
|
half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
|
|
half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
|
|
#endif
|
|
rgby2 = (rgby2 + rgby1) * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
bool twoTapLt = rgby2.w < lumaMin;
|
|
bool twoTapGt = rgby2.w > lumaMax;
|
|
/*--------------------------------------------------------------------------*/
|
|
if(twoTapLt || twoTapGt) rgby2 = rgby1;
|
|
/*--------------------------------------------------------------------------*/
|
|
return rgby2; }
|
|
/*==========================================================================*/
|
|
#endif
|
|
|
|
|
|
|
|
/*============================================================================
|
|
|
|
FXAA3 QUALITY - PC
|
|
|
|
============================================================================*/
|
|
#if (FXAA_PC == 1)
|
|
/*--------------------------------------------------------------------------*/
|
|
float4 FxaaPixelShader(
|
|
// {xy} = center of pixel
|
|
float2 pos,
|
|
// {xyzw} = not used on FXAA3 Quality
|
|
float4 posPos,
|
|
// {rgb_} = color in linear or perceptual color space
|
|
// {___a} = luma in perceptual color space (not linear)
|
|
FxaaTex tex,
|
|
// This must be from a constant/uniform.
|
|
// {x_} = 1.0/screenWidthInPixels
|
|
// {_y} = 1.0/screenHeightInPixels
|
|
float2 rcpFrame,
|
|
// {xyzw} = not used on FXAA3 Quality
|
|
float4 rcpFrameOpt
|
|
) {
|
|
/*--------------------------------------------------------------------------*/
|
|
float2 posM;
|
|
posM.x = pos.x;
|
|
posM.y = pos.y;
|
|
#if (FXAA_GATHER4_ALPHA == 1)
|
|
#if (FXAA_DISCARD == 0)
|
|
float4 rgbyM = FxaaTexTop(tex, posM);
|
|
#define lumaM rgbyM.w
|
|
#endif
|
|
float4 luma4A = FxaaTexAlpha4(tex, posM, rcpFrame.xy);
|
|
float4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1), rcpFrame.xy);
|
|
#if (FXAA_DISCARD == 1)
|
|
#define lumaM luma4A.w
|
|
#endif
|
|
#define lumaE luma4A.z
|
|
#define lumaS luma4A.x
|
|
#define lumaSE luma4A.y
|
|
#define lumaNW luma4B.w
|
|
#define lumaN luma4B.z
|
|
#define lumaW luma4B.x
|
|
#else
|
|
float4 rgbyM = FxaaTexTop(tex, posM);
|
|
#define lumaM rgbyM.w
|
|
float lumaS = FxaaTexOff(tex, posM, FxaaInt2( 0, 1), rcpFrame.xy).w;
|
|
float lumaE = FxaaTexOff(tex, posM, FxaaInt2( 1, 0), rcpFrame.xy).w;
|
|
float lumaN = FxaaTexOff(tex, posM, FxaaInt2( 0,-1), rcpFrame.xy).w;
|
|
float lumaW = FxaaTexOff(tex, posM, FxaaInt2(-1, 0), rcpFrame.xy).w;
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
float maxSM = max(lumaS, lumaM);
|
|
float minSM = min(lumaS, lumaM);
|
|
float maxESM = max(lumaE, maxSM);
|
|
float minESM = min(lumaE, minSM);
|
|
float maxWN = max(lumaN, lumaW);
|
|
float minWN = min(lumaN, lumaW);
|
|
float rangeMax = max(maxWN, maxESM);
|
|
float rangeMin = min(minWN, minESM);
|
|
float rangeMaxScaled = rangeMax * FXAA_QUALITY__EDGE_THRESHOLD;
|
|
float range = rangeMax - rangeMin;
|
|
float rangeMaxClamped = max(FXAA_QUALITY__EDGE_THRESHOLD_MIN, rangeMaxScaled);
|
|
bool earlyExit = range < rangeMaxClamped;
|
|
/*--------------------------------------------------------------------------*/
|
|
if(earlyExit)
|
|
#if (FXAA_DISCARD == 1)
|
|
FxaaDiscard;
|
|
#else
|
|
return rgbyM;
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_GATHER4_ALPHA == 0)
|
|
float lumaNW = FxaaTexOff(tex, posM, FxaaInt2(-1,-1), rcpFrame.xy).w;
|
|
float lumaSE = FxaaTexOff(tex, posM, FxaaInt2( 1, 1), rcpFrame.xy).w;
|
|
float lumaNE = FxaaTexOff(tex, posM, FxaaInt2( 1,-1), rcpFrame.xy).w;
|
|
float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w;
|
|
#else
|
|
float lumaNE = FxaaTexOff(tex, posM, FxaaInt2(1, -1), rcpFrame.xy).w;
|
|
float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w;
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
float lumaNS = lumaN + lumaS;
|
|
float lumaWE = lumaW + lumaE;
|
|
float subpixRcpRange = 1.0/range;
|
|
float subpixNSWE = lumaNS + lumaWE;
|
|
float edgeHorz1 = (-2.0 * lumaM) + lumaNS;
|
|
float edgeVert1 = (-2.0 * lumaM) + lumaWE;
|
|
/*--------------------------------------------------------------------------*/
|
|
float lumaNESE = lumaNE + lumaSE;
|
|
float lumaNWNE = lumaNW + lumaNE;
|
|
float edgeHorz2 = (-2.0 * lumaE) + lumaNESE;
|
|
float edgeVert2 = (-2.0 * lumaN) + lumaNWNE;
|
|
/*--------------------------------------------------------------------------*/
|
|
float lumaNWSW = lumaNW + lumaSW;
|
|
float lumaSWSE = lumaSW + lumaSE;
|
|
float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);
|
|
float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);
|
|
float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;
|
|
float edgeVert3 = (-2.0 * lumaS) + lumaSWSE;
|
|
float edgeHorz = abs(edgeHorz3) + edgeHorz4;
|
|
float edgeVert = abs(edgeVert3) + edgeVert4;
|
|
/*--------------------------------------------------------------------------*/
|
|
float subpixNWSWNESE = lumaNWSW + lumaNESE;
|
|
float lengthSign = rcpFrame.x;
|
|
bool horzSpan = edgeHorz >= edgeVert;
|
|
float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;
|
|
/*--------------------------------------------------------------------------*/
|
|
if(!horzSpan) lumaN = lumaW;
|
|
if(!horzSpan) lumaS = lumaE;
|
|
if(horzSpan) lengthSign = rcpFrame.y;
|
|
float subpixB = (subpixA * (1.0/12.0)) - lumaM;
|
|
/*--------------------------------------------------------------------------*/
|
|
float gradientN = lumaN - lumaM;
|
|
float gradientS = lumaS - lumaM;
|
|
float lumaNN = lumaN + lumaM;
|
|
float lumaSS = lumaS + lumaM;
|
|
bool pairN = abs(gradientN) >= abs(gradientS);
|
|
float gradient = max(abs(gradientN), abs(gradientS));
|
|
if(pairN) lengthSign = -lengthSign;
|
|
float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);
|
|
/*--------------------------------------------------------------------------*/
|
|
float2 posB;
|
|
posB.x = posM.x;
|
|
posB.y = posM.y;
|
|
float2 offNP;
|
|
offNP.x = (!horzSpan) ? 0.0 : rcpFrame.x;
|
|
offNP.y = ( horzSpan) ? 0.0 : rcpFrame.y;
|
|
if(!horzSpan) posB.x += lengthSign * 0.5;
|
|
if( horzSpan) posB.y += lengthSign * 0.5;
|
|
/*--------------------------------------------------------------------------*/
|
|
float2 posN;
|
|
posN.x = posB.x - offNP.x * FXAA_QUALITY__P0;
|
|
posN.y = posB.y - offNP.y * FXAA_QUALITY__P0;
|
|
float2 posP;
|
|
posP.x = posB.x + offNP.x * FXAA_QUALITY__P0;
|
|
posP.y = posB.y + offNP.y * FXAA_QUALITY__P0;
|
|
float subpixD = ((-2.0)*subpixC) + 3.0;
|
|
float lumaEndN = FxaaTexTop(tex, posN).w;
|
|
float subpixE = subpixC * subpixC;
|
|
float lumaEndP = FxaaTexTop(tex, posP).w;
|
|
/*--------------------------------------------------------------------------*/
|
|
if(!pairN) lumaNN = lumaSS;
|
|
float gradientScaled = gradient * 1.0/4.0;
|
|
float lumaMM = lumaM - lumaNN * 0.5;
|
|
float subpixF = subpixD * subpixE;
|
|
bool lumaMLTZero = lumaMM < 0.0;
|
|
/*--------------------------------------------------------------------------*/
|
|
lumaEndN -= lumaNN * 0.5;
|
|
lumaEndP -= lumaNN * 0.5;
|
|
bool doneN = abs(lumaEndN) >= gradientScaled;
|
|
bool doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P1;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P1;
|
|
bool doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P1;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P1;
|
|
/*--------------------------------------------------------------------------*/
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P2;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P2;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P2;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P2;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 3)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P3;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P3;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P3;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P3;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 4)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P4;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P4;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P4;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P4;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 5)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P5;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P5;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P5;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P5;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 6)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P6;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P6;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P6;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P6;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 7)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P7;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P7;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P7;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P7;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 8)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P8;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P8;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P8;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P8;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 9)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P9;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P9;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P9;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P9;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 10)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P10;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P10;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P10;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P10;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 11)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P11;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P11;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P11;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P11;
|
|
/*--------------------------------------------------------------------------*/
|
|
#if (FXAA_QUALITY__PS > 12)
|
|
if(doneNP) {
|
|
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
|
|
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
|
|
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
|
|
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
|
|
doneN = abs(lumaEndN) >= gradientScaled;
|
|
doneP = abs(lumaEndP) >= gradientScaled;
|
|
if(!doneN) posN.x -= offNP.x * FXAA_QUALITY__P12;
|
|
if(!doneN) posN.y -= offNP.y * FXAA_QUALITY__P12;
|
|
doneNP = (!doneN) || (!doneP);
|
|
if(!doneP) posP.x += offNP.x * FXAA_QUALITY__P12;
|
|
if(!doneP) posP.y += offNP.y * FXAA_QUALITY__P12;
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
#endif
|
|
/*--------------------------------------------------------------------------*/
|
|
}
|
|
/*--------------------------------------------------------------------------*/
|
|
float dstN = posM.x - posN.x;
|
|
float dstP = posP.x - posM.x;
|
|
if(!horzSpan) dstN = posM.y - posN.y;
|
|
if(!horzSpan) dstP = posP.y - posM.y;
|
|
/*--------------------------------------------------------------------------*/
|
|
bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;
|
|
float spanLength = (dstP + dstN);
|
|
bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;
|
|
float spanLengthRcp = 1.0/spanLength;
|
|
/*--------------------------------------------------------------------------*/
|
|
bool directionN = dstN < dstP;
|
|
float dst = min(dstN, dstP);
|
|
bool goodSpan = directionN ? goodSpanN : goodSpanP;
|
|
float subpixG = subpixF * subpixF;
|
|
float pixelOffset = (dst * (-spanLengthRcp)) + 0.5;
|
|
float subpixH = subpixG * FXAA_QUALITY__SUBPIX;
|
|
/*--------------------------------------------------------------------------*/
|
|
float pixelOffsetGood = goodSpan ? pixelOffset : 0.0;
|
|
float pixelOffsetSubpix = max(pixelOffsetGood, subpixH);
|
|
if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;
|
|
if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;
|
|
#if (FXAA_DISCARD == 1)
|
|
return FxaaTexTop(tex, posM);
|
|
#else
|
|
return float4(FxaaTexTop(tex, posM).xyz, lumaM);
|
|
#endif
|
|
}
|
|
/*==========================================================================*/
|
|
#endif
|
|
|
|
PS_OUTPUT ps_main(PS_INPUT input)
|
|
{
|
|
PS_OUTPUT output;
|
|
|
|
float2 pos = input.t;
|
|
float4 posPos = (float4)0;
|
|
|
|
FxaaTex tex;
|
|
|
|
#if SHADER_MODEL >= 0x400
|
|
|
|
tex.tex = Texture;
|
|
tex.smpl = TextureSampler;
|
|
|
|
#else
|
|
|
|
tex = Texture;
|
|
|
|
#endif
|
|
|
|
output.c = FxaaPixelShader(pos, posPos, tex, _rcpFrame.xy, _rcpFrameOpt);
|
|
|
|
return output;
|
|
}
|
|
|
|
#endif
|